Release 4.10 mm/huge_memory.c

Directory: mm
/*
 *  Copyright (C) 2009  Red Hat, Inc.
 *
 *  This work is licensed under the terms of the GNU GPL, version 2. See
 *  the COPYING file in the top-level directory.
 */


#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/mmu_notifier.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/shrinker.h>
#include <linux/mm_inline.h>
#include <linux/swapops.h>
#include <linux/dax.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/pfn_t.h>
#include <linux/mman.h>
#include <linux/memremap.h>
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/migrate.h>
#include <linux/hashtable.h>
#include <linux/userfaultfd_k.h>
#include <linux/page_idle.h>
#include <linux/shmem_fs.h>

#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"

/*
 * By default transparent hugepage support is disabled in order that avoid
 * to risk increase the memory footprint of applications without a guaranteed
 * benefit. When transparent hugepage support is enabled, is for all mappings,
 * and khugepaged scans all mappings.
 * Defrag is invoked by khugepaged hugepage allocations and by page faults
 * for all hugepage allocations.
 */

unsigned long transparent_hugepage_flags __read_mostly =
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS
	(1<<TRANSPARENT_HUGEPAGE_FLAG)|
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);


static struct shrinker deferred_split_shrinker;


static atomic_t huge_zero_refcount;

struct page *huge_zero_page __read_mostly;



static struct page *get_huge_zero_page(void)
{
	struct page *zero_page;
retry:
	if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
		return READ_ONCE(huge_zero_page);

	zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
			HPAGE_PMD_ORDER);
	if (!zero_page) {
		count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
		return NULL;
	}
	count_vm_event(THP_ZERO_PAGE_ALLOC);
	preempt_disable();
	if (cmpxchg(&huge_zero_page, NULL, zero_page)) {
		preempt_enable();
		__free_pages(zero_page, compound_order(zero_page));
		goto retry;
	}

	/* We take additional reference here. It will be put back by shrinker */
	atomic_set(&huge_zero_refcount, 2);
	preempt_enable();
	return READ_ONCE(huge_zero_page);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 99 81.82% 6 54.55%
andrea arcangeli andrea arcangeli 14 11.57% 2 18.18%
yu zhao yu zhao 6 4.96% 1 9.09%
jason low jason low 1 0.83% 1 9.09%
aaron lu aaron lu 1 0.83% 1 9.09%
Total 121 100.00% 11 100.00%



static void put_huge_zero_page(void)
{
	/*
         * Counter should never go to zero here. Only shrinker can put
         * last reference.
         */
	BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 17 94.44% 2 66.67%
aaron lu aaron lu 1 5.56% 1 33.33%
Total 18 100.00% 3 100.00%



struct page *mm_get_huge_zero_page(struct mm_struct *mm)
{
	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
		return READ_ONCE(huge_zero_page);

	if (!get_huge_zero_page())
		return NULL;

	if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
		put_huge_zero_page();

	return READ_ONCE(huge_zero_page);
}

Contributors
Person Tokens Prop Commits CommitProp
aaron lu aaron lu 60 100.00% 1 100.00%
Total 60 100.00% 1 100.00%



void mm_put_huge_zero_page(struct mm_struct *mm)
{
	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
		put_huge_zero_page();
}

Contributors
Person Tokens Prop Commits CommitProp
aaron lu aaron lu 25 100.00% 1 100.00%
Total 25 100.00% 1 100.00%



static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
					struct shrink_control *sc)
{
	/* we can free zero page only if last reference remains */
	return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 27 87.10% 2 66.67%
glauber costa glauber costa 4 12.90% 1 33.33%
Total 31 100.00% 3 100.00%



static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
				       struct shrink_control *sc)
{
	if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
		struct page *zero_page = xchg(&huge_zero_page, NULL);
		BUG_ON(zero_page == NULL);
		__free_pages(zero_page, compound_order(zero_page));
		return HPAGE_PMD_NR;
	}

	return 0;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 44 63.77% 3 60.00%
glauber costa glauber costa 19 27.54% 1 20.00%
yu zhao yu zhao 6 8.70% 1 20.00%
Total 69 100.00% 5 100.00%


static struct shrinker huge_zero_page_shrinker = {
	.count_objects = shrink_huge_zero_page_count,
	.scan_objects = shrink_huge_zero_page_scan,
	.seeks = DEFAULT_SEEKS,
};

#ifdef CONFIG_SYSFS



static ssize_t triple_flag_store(struct kobject *kobj,
				 struct kobj_attribute *attr,
				 const char *buf, size_t count,
				 enum transparent_hugepage_flag enabled,
				 enum transparent_hugepage_flag deferred,
				 enum transparent_hugepage_flag req_madv)
{
	if (!memcmp("defer", buf,
		    min(sizeof("defer")-1, count))) {
		if (enabled == deferred)
			return -EINVAL;
		clear_bit(enabled, &transparent_hugepage_flags);
		clear_bit(req_madv, &transparent_hugepage_flags);
		set_bit(deferred, &transparent_hugepage_flags);
	} else if (!memcmp("always", buf,
		    min(sizeof("always")-1, count))) {
		clear_bit(deferred, &transparent_hugepage_flags);
		clear_bit(req_madv, &transparent_hugepage_flags);
		set_bit(enabled, &transparent_hugepage_flags);
	} else if (!memcmp("madvise", buf,
			   min(sizeof("madvise")-1, count))) {
		clear_bit(enabled, &transparent_hugepage_flags);
		clear_bit(deferred, &transparent_hugepage_flags);
		set_bit(req_madv, &transparent_hugepage_flags);
	} else if (!memcmp("never", buf,
			   min(sizeof("never")-1, count))) {
		clear_bit(enabled, &transparent_hugepage_flags);
		clear_bit(req_madv, &transparent_hugepage_flags);
		clear_bit(deferred, &transparent_hugepage_flags);
	} else
		return -EINVAL;

	return count;
}

Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 178 71.49% 2 66.67%
mel gorman mel gorman 71 28.51% 1 33.33%
Total 249 100.00% 3 100.00%



static ssize_t enabled_show(struct kobject *kobj,
			    struct kobj_attribute *attr, char *buf)
{
	if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags))
		return sprintf(buf, "[always] madvise never\n");
	else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags))
		return sprintf(buf, "always [madvise] never\n");
	else
		return sprintf(buf, "always madvise [never]\n");
}

Contributors
Person Tokens Prop Commits CommitProp
mel gorman mel gorman 37 56.06% 1 50.00%
andrea arcangeli andrea arcangeli 29 43.94% 1 50.00%
Total 66 100.00% 2 100.00%



static ssize_t enabled_store(struct kobject *kobj,
			     struct kobj_attribute *attr,
			     const char *buf, size_t count)
{
	ssize_t ret;

	ret = triple_flag_store(kobj, attr, buf, count,
				TRANSPARENT_HUGEPAGE_FLAG,
				TRANSPARENT_HUGEPAGE_FLAG,
				TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);

	if (ret > 0) {
		int err = start_stop_khugepaged();
		if (err)
			ret = err;
	}

	return ret;
}
Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 65 91.55% 2 40.00%
kirill a. shutemov kirill a. shutemov 3 4.23% 2 40.00%
mel gorman mel gorman 3 4.23% 1 20.00%
Total 71 100.00% 5 100.00%


static struct kobj_attribute enabled_attr =
	__ATTR(enabled, 0644, enabled_show, enabled_store);



ssize_t single_hugepage_flag_show(struct kobject *kobj,
				struct kobj_attribute *attr, char *buf,
				enum transparent_hugepage_flag flag)
{
	return sprintf(buf, "%d\n",
		       !!test_bit(flag, &transparent_hugepage_flags));
}

Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 31 75.61% 1 33.33%
ben hutchings ben hutchings 9 21.95% 1 33.33%
kirill a. shutemov kirill a. shutemov 1 2.44% 1 33.33%
Total 41 100.00% 3 100.00%



ssize_t single_hugepage_flag_store(struct kobject *kobj,
				 struct kobj_attribute *attr,
				 const char *buf, size_t count,
				 enum transparent_hugepage_flag flag)
{
	unsigned long value;
	int ret;

	ret = kstrtoul(buf, 10, &value);
	if (ret < 0)
		return ret;
	if (value > 1)
		return -EINVAL;

	if (value)
		set_bit(flag, &transparent_hugepage_flags);
	else
		clear_bit(flag, &transparent_hugepage_flags);

	return count;
}

Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 51 57.30% 1 33.33%
ben hutchings ben hutchings 37 41.57% 1 33.33%
kirill a. shutemov kirill a. shutemov 1 1.12% 1 33.33%
Total 89 100.00% 3 100.00%

/*
 * Currently defrag only disables __GFP_NOWAIT for allocation. A blind
 * __GFP_REPEAT is too aggressive, it's never worth swapping tons of
 * memory just to allocate one more hugepage.
 */


static ssize_t defrag_show(struct kobject *kobj,
			   struct kobj_attribute *attr, char *buf)
{
	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
		return sprintf(buf, "[always] defer madvise never\n");
	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
		return sprintf(buf, "always [defer] madvise never\n");
	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
		return sprintf(buf, "always defer [madvise] never\n");
	else
		return sprintf(buf, "always defer madvise [never]\n");

}
Contributors
Person Tokens Prop Commits CommitProp
mel gorman mel gorman 55 65.48% 1 50.00%
andrea arcangeli andrea arcangeli 29 34.52% 1 50.00%
Total 84 100.00% 2 100.00%



static ssize_t defrag_store(struct kobject *kobj,
			    struct kobj_attribute *attr,
			    const char *buf, size_t count)
{
	return triple_flag_store(kobj, attr, buf, count,
				 TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
				 TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
				 TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
}
Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 38 90.48% 1 50.00%
mel gorman mel gorman 4 9.52% 1 50.00%
Total 42 100.00% 2 100.00%


static struct kobj_attribute defrag_attr =
	__ATTR(defrag, 0644, defrag_show, defrag_store);



static ssize_t use_zero_page_show(struct kobject *kobj,
		struct kobj_attribute *attr, char *buf)
{
	return single_hugepage_flag_show(kobj, attr, buf,
				TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 32 100.00% 2 100.00%
Total 32 100.00% 2 100.00%



static ssize_t use_zero_page_store(struct kobject *kobj,
		struct kobj_attribute *attr, const char *buf, size_t count)
{
	return single_hugepage_flag_store(kobj, attr, buf, count,
				 TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 38 100.00% 2 100.00%
Total 38 100.00% 2 100.00%


static struct kobj_attribute use_zero_page_attr =
	__ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);



static ssize_t hpage_pmd_size_show(struct kobject *kobj,
		struct kobj_attribute *attr, char *buf)
{
	return sprintf(buf, "%lu\n", HPAGE_PMD_SIZE);
}
Contributors
Person Tokens Prop Commits CommitProp
hugh dickins hugh dickins 30 100.00% 1 100.00%
Total 30 100.00% 1 100.00%


static struct kobj_attribute hpage_pmd_size_attr =
	__ATTR_RO(hpage_pmd_size);

#ifdef CONFIG_DEBUG_VM


static ssize_t debug_cow_show(struct kobject *kobj,
				struct kobj_attribute *attr, char *buf)
{
	return single_hugepage_flag_show(kobj, attr, buf,
				TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG);
}
Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 31 96.88% 1 50.00%
kirill a. shutemov kirill a. shutemov 1 3.12% 1 50.00%
Total 32 100.00% 2 100.00%



static ssize_t debug_cow_store(struct kobject *kobj,
			       struct kobj_attribute *attr,
			       const char *buf, size_t count)
{
	return single_hugepage_flag_store(kobj, attr, buf, count,
				 TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG);
}
Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 37 97.37% 1 50.00%
kirill a. shutemov kirill a. shutemov 1 2.63% 1 50.00%
Total 38 100.00% 2 100.00%


static struct kobj_attribute debug_cow_attr =
	__ATTR(debug_cow, 0644, debug_cow_show, debug_cow_store);
#endif /* CONFIG_DEBUG_VM */


static struct attribute *hugepage_attr[] = {
	&enabled_attr.attr,
	&defrag_attr.attr,
	&use_zero_page_attr.attr,
	&hpage_pmd_size_attr.attr,
#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
	&shmem_enabled_attr.attr,
#endif
#ifdef CONFIG_DEBUG_VM
	&debug_cow_attr.attr,
#endif
	NULL,
};


static struct attribute_group hugepage_attr_group = {
	.attrs = hugepage_attr,
};



static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
{
	int err;

	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
	if (unlikely(!*hugepage_kobj)) {
		pr_err("failed to create transparent hugepage kobject\n");
		return -ENOMEM;
	}

	err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
	if (err) {
		pr_err("failed to register transparent hugepage group\n");
		goto delete_obj;
	}

	err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
	if (err) {
		pr_err("failed to register transparent hugepage group\n");
		goto remove_hp_group;
	}

	return 0;

remove_hp_group:
	sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group);
delete_obj:
	kobject_put(*hugepage_kobj);
	return err;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 70 57.85% 1 25.00%
andrea arcangeli andrea arcangeli 50 41.32% 2 50.00%
david rientjes david rientjes 1 0.83% 1 25.00%
Total 121 100.00% 4 100.00%



static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj)
{
	sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group);
	sysfs_remove_group(hugepage_kobj, &hugepage_attr_group);
	kobject_put(hugepage_kobj);
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 27 81.82% 1 33.33%
andrea arcangeli andrea arcangeli 6 18.18% 2 66.67%
Total 33 100.00% 3 100.00%

#else


static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj)
{
	return 0;
}

Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 11 68.75% 2 66.67%
kirill a. shutemov kirill a. shutemov 5 31.25% 1 33.33%
Total 16 100.00% 3 100.00%



static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
{
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 6 54.55% 1 33.33%
andrea arcangeli andrea arcangeli 5 45.45% 2 66.67%
Total 11 100.00% 3 100.00%

#endif /* CONFIG_SYSFS */



static int __init hugepage_init(void)
{
	int err;
	struct kobject *hugepage_kobj;

	if (!has_transparent_hugepage()) {
		transparent_hugepage_flags = 0;
		return -EINVAL;
	}

	/*
         * hugepages can't be allocated by the buddy allocator
         */
	MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER >= MAX_ORDER);
	/*
         * we use page->mapping and page->index in second tail page
         * as list_head: assuming THP order >= 2
         */
	MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);

	err = hugepage_init_sysfs(&hugepage_kobj);
	if (err)
		goto err_sysfs;

	err = khugepaged_init();
	if (err)
		goto err_slab;

	err = register_shrinker(&huge_zero_page_shrinker);
	if (err)
		goto err_hzp_shrinker;
	err = register_shrinker(&deferred_split_shrinker);
	if (err)
		goto err_split_shrinker;

	/*
         * By default disable transparent hugepages on smaller systems,
         * where the extra memory used could hurt more than TLB overhead
         * is likely to save.  The admin can still enable it through /sys.
         */
	if (totalram_pages < (512 << (20 - PAGE_SHIFT))) {
		transparent_hugepage_flags = 0;
		return 0;
	}

	err = start_stop_khugepaged();
	if (err)
		goto err_khugepaged;

	return 0;
err_khugepaged:
	unregister_shrinker(&deferred_split_shrinker);
err_split_shrinker:
	unregister_shrinker(&huge_zero_page_shrinker);
err_hzp_shrinker:
	khugepaged_destroy();
err_slab:
	hugepage_exit_sysfs(hugepage_kobj);
err_sysfs:
	return err;
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 151 84.83% 1 33.33%
andrea arcangeli andrea arcangeli 27 15.17% 2 66.67%
Total 178 100.00% 3 100.00%


subsys_initcall(hugepage_init);



static int __init setup_transparent_hugepage(char *str)
{
	int ret = 0;
	if (!str)
		goto out;
	if (!strcmp(str, "always")) {
		set_bit(TRANSPARENT_HUGEPAGE_FLAG,
			&transparent_hugepage_flags);
		clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
			  &transparent_hugepage_flags);
		ret = 1;
	} else if (!strcmp(str, "madvise")) {
		clear_bit(TRANSPARENT_HUGEPAGE_FLAG,
			  &transparent_hugepage_flags);
		set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
			&transparent_hugepage_flags);
		ret = 1;
	} else if (!strcmp(str, "never")) {
		clear_bit(TRANSPARENT_HUGEPAGE_FLAG,
			  &transparent_hugepage_flags);
		clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
			  &transparent_hugepage_flags);
		ret = 1;
	}
out:
	if (!ret)
		pr_warn("transparent_hugepage= cannot parse, ignored\n");
	return ret;
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 97 70.80% 1 25.00%
andrea arcangeli andrea arcangeli 40 29.20% 3 75.00%
Total 137 100.00% 4 100.00%

__setup("transparent_hugepage=", setup_transparent_hugepage);



pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
	if (likely(vma->vm_flags & VM_WRITE))
		pmd = pmd_mkwrite(pmd);
	return pmd;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 30 88.24% 1 50.00%
ebru akagunduz ebru akagunduz 4 11.76% 1 50.00%
Total 34 100.00% 2 100.00%



static inline struct list_head *page_deferred_list(struct page *page)
{
	/*
         * ->lru in the tail pages is occupied by compound_head.
         * Let's use ->mapping + ->index in the second tail page as list_head.
         */
	return (struct list_head *)&page[2].mapping;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 25 86.21% 1 50.00%
ebru akagunduz ebru akagunduz 4 13.79% 1 50.00%
Total 29 100.00% 2 100.00%



void prep_transhuge_page(struct page *page)
{
	/*
         * we use page->mapping and page->indexlru in second tail page
         * as list_head: assuming THP order >= 2
         */

	INIT_LIST_HEAD(page_deferred_list(page));
	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 22 84.62% 1 50.00%
ebru akagunduz ebru akagunduz 4 15.38% 1 50.00%
Total 26 100.00% 2 100.00%



unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len,
		loff_t off, unsigned long flags, unsigned long size)
{
	unsigned long addr;
	loff_t off_end = off + len;
	loff_t off_align = round_up(off, size);
	unsigned long len_pad;

	if (off_end <= off_align || (off_end - off_align) < size)
		return 0;

	len_pad = len + size;
	if (len_pad < len || (off + len_pad) < off)
		return 0;

	addr = current->mm->get_unmapped_area(filp, 0, len_pad,
					      off >> PAGE_SHIFT, flags);
	if (IS_ERR_VALUE(addr))
		return 0;

	addr += (off - addr) & (size - 1);
	return addr;
}

Contributors
Person Tokens Prop Commits CommitProp
toshi kani toshi kani 139 100.00% 1 100.00%
Total 139 100.00% 1 100.00%



unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
		unsigned long len, unsigned long pgoff, unsigned long flags)
{
	loff_t off = (loff_t)pgoff << PAGE_SHIFT;

	if (addr)
		goto out;
	if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD))
		goto out;

	addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE);
	if (addr)
		return addr;

 out:
	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
Contributors
Person Tokens Prop Commits CommitProp
toshi kani toshi kani 107 100.00% 1 100.00%
Total 107 100.00% 1 100.00%


EXPORT_SYMBOL_GPL(thp_get_unmapped_area);



static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
		gfp_t gfp)
{
	struct vm_area_struct *vma = vmf->vma;
	struct mem_cgroup *memcg;
	pgtable_t pgtable;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;

	VM_BUG_ON_PAGE(!PageCompound(page), page);

	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
		put_page(page);
		count_vm_event(THP_FAULT_FALLBACK);
		return VM_FAULT_FALLBACK;
	}

	pgtable = pte_alloc_one(vma->vm_mm, haddr);
	if (unlikely(!pgtable)) {
		mem_cgroup_cancel_charge(page, memcg, true);
		put_page(page);
		return VM_FAULT_OOM;
	}

	clear_huge_page(page, haddr, HPAGE_PMD_NR);
	/*
         * The memory barrier inside __SetPageUptodate makes sure that
         * clear_huge_page writes become visible before the set_pmd_at()
         * write.
         */
	__SetPageUptodate(page);

	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
	if (unlikely(!pmd_none(*vmf->pmd))) {
		spin_unlock(vmf->ptl);
		mem_cgroup_cancel_charge(page, memcg, true);
		put_page(page);
		pte_free(vma->vm_mm, pgtable);
	} else {
		pmd_t entry;

		/* Deliver the page fault to userland */
		if (userfaultfd_missing(vma)) {
			int ret;

			spin_unlock(vmf->ptl);
			mem_cgroup_cancel_charge(page, memcg, true);
			put_page(page);
			pte_free(vma->vm_mm, pgtable);
			ret = handle_userfault(vmf, VM_UFFD_MISSING);
			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
			return ret;
		}

		entry = mk_huge_pmd(page, vma->vm_page_prot);
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		page_add_new_anon_rmap(page, vma, haddr, true);
		mem_cgroup_commit_charge(page, memcg, false, true);
		lru_cache_add_active_or_unevictable(page, vma);
		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
		atomic_long_inc(&vma->vm_mm->nr_ptes);
		spin_unlock(vmf->ptl);
		count_vm_event(THP_FAULT_ALLOC);
	}

	return 0;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 279 71.91% 5 55.56%
andrea arcangeli andrea arcangeli 35 9.02% 1 11.11%
ebru akagunduz ebru akagunduz 33 8.51% 1 11.11%
li shaohua li shaohua 28 7.22% 1 11.11%
jan kara jan kara 13 3.35% 1 11.11%
Total 388 100.00% 9 100.00%

/*
 * If THP defrag is set to always then directly reclaim/compact as necessary
 * If set to defer then do only background reclaim/compact and defer to khugepaged
 * If set to madvise and the VMA is flagged then directly reclaim/compact
 * When direct reclaim/compact is allowed, don't retry except for flagged VMA's
 */


static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
{
	bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);

	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
				&transparent_hugepage_flags) && vma_madvised)
		return GFP_TRANSHUGE;
	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
						&transparent_hugepage_flags))
		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
						&transparent_hugepage_flags))
		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);

	return GFP_TRANSHUGE_LIGHT;
}

Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 34 41.98% 1 33.33%
vlastimil babka vlastimil babka 29 35.80% 1 33.33%
kirill a. shutemov kirill a. shutemov 18 22.22% 1 33.33%
Total 81 100.00% 3 100.00%

/* Caller must hold page table lock. */


static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
		struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
		struct page *zero_page)
{
	pmd_t entry;
	if (!pmd_none(*pmd))
		return false;
	entry = mk_pmd(zero_page, vma->vm_page_prot);
	entry = pmd_mkhuge(entry);
	if (pgtable)
		pgtable_trans_huge_deposit(mm, pmd, pgtable);
	set_pmd_at(mm, haddr, pmd, entry);
	atomic_long_inc(&mm->nr_ptes);
	return true;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 77 77.00% 1 50.00%
andrea arcangeli andrea arcangeli 23 23.00% 1 50.00%
Total 100 100.00% 2 100.00%



int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;
	gfp_t gfp;
	struct page *page;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;

	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
		return VM_FAULT_FALLBACK;
	if (unlikely(anon_vma_prepare(vma)))
		return VM_FAULT_OOM;
	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
		return VM_FAULT_OOM;
	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
			!mm_forbids_zeropage(vma->vm_mm) &&
			transparent_hugepage_use_zero_page()) {
		pgtable_t pgtable;
		struct page *zero_page;
		bool set;
		int ret;
		pgtable = pte_alloc_one(vma->vm_mm, haddr);
		if (unlikely(!pgtable))
			return VM_FAULT_OOM;
		zero_page = mm_get_huge_zero_page(vma->vm_mm);
		if (unlikely(!zero_page)) {
			pte_free(vma->vm_mm, pgtable);
			count_vm_event(THP_FAULT_FALLBACK);
			return VM_FAULT_FALLBACK;
		}
		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
		ret = 0;
		set = false;
		if (pmd_none(*vmf->pmd)) {
			if (userfaultfd_missing(vma)) {
				spin_unlock(vmf->ptl);
				ret = handle_userfault(vmf, VM_UFFD_MISSING);
				VM_BUG_ON(ret & VM_FAULT_FALLBACK);
			} else {
				set_huge_zero_page(pgtable, vma->vm_mm, vma,
						   haddr, vmf->pmd, zero_page);
				spin_unlock(vmf->ptl);
				set = true;
			}
		} else
			spin_unlock(vmf->ptl);
		if (!set)
			pte_free(vma->vm_mm, pgtable);
		return ret;
	}
	gfp = alloc_hugepage_direct_gfpmask(vma);
	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
	if (unlikely(!page)) {
		count_vm_event(THP_FAULT_FALLBACK);
		return VM_FAULT_FALLBACK;
	}
	prep_transhuge_page(page);
	return __do_huge_pmd_anonymous_page(vmf, page, gfp);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 231 64.17% 5 45.45%
andrea arcangeli andrea arcangeli 95 26.39% 3 27.27%
jan kara jan kara 14 3.89% 1 9.09%
johannes weiner johannes weiner 14 3.89% 1 9.09%
aaron lu aaron lu 6 1.67% 1 9.09%
Total 360 100.00% 11 100.00%



static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
		pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write)
{
	struct mm_struct *mm = vma->vm_mm;
	pmd_t entry;
	spinlock_t *ptl;

	ptl = pmd_lock(mm, pmd);
	entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
	if (pfn_t_devmap(pfn))
		entry = pmd_mkdevmap(entry);
	if (write) {
		entry = pmd_mkyoung(pmd_mkdirty(entry));
		entry = maybe_pmd_mkwrite(entry, vma);
	}
	set_pmd_at(mm, addr, pmd, entry);
	update_mmu_cache_pmd(vma, addr, pmd);
	spin_unlock(ptl);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 97 75.19% 5 50.00%
andrea arcangeli andrea arcangeli 20 15.50% 1 10.00%
johannes weiner johannes weiner 6 4.65% 1 10.00%
aneesh kumar aneesh kumar 5 3.88% 2 20.00%
gerald schaefer gerald schaefer 1 0.78% 1 10.00%
Total 129 100.00% 10 100.00%



int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
			pmd_t *pmd, pfn_t pfn, bool write)
{
	pgprot_t pgprot = vma->vm_page_prot;
	/*
         * If we had pmd_special, we could avoid all these restrictions,
         * but we need to be consistent with PTEs and architectures that
         * can't support a 'special' bit.
         */
	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
						(VM_PFNMAP|VM_MIXEDMAP));
	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
	BUG_ON(!pfn_t_devmap(pfn));

	if (addr < vma->vm_start || addr >= vma->vm_end)
		return VM_FAULT_SIGBUS;

	track_pfn_insert(vma, &pgprot, pfn);

	insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
	return VM_FAULT_NOPAGE;
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 108 76.60% 1 20.00%
mel gorman mel gorman 28 19.86% 1 20.00%
andrea arcangeli andrea arcangeli 4 2.84% 2 40.00%
borislav petkov borislav petkov 1 0.71% 1 20.00%
Total 141 100.00% 5 100.00%


EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);



static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
		pmd_t *pmd)
{
	pmd_t _pmd;

	/*
         * We should set the dirty bit only for FOLL_WRITE but for now
         * the dirty bit in the pmd is meaningless.  And if the dirty
         * bit will become meaningful and we'll only set it with
         * FOLL_WRITE, an atomic set_bit will be required on the pmd to
         * set the young bit, instead of the current set_pmd_at.
         */
	_pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
	if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
				pmd, _pmd,  1))
		update_mmu_cache_pmd(vma, addr, pmd);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 52 86.67% 4 50.00%
andrew morton andrew morton 4 6.67% 1 12.50%
matthew wilcox matthew wilcox 2 3.33% 1 12.50%
aneesh kumar aneesh kumar 2 3.33% 2 25.00%
Total 60 100.00% 8 100.00%



struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
		pmd_t *pmd, int flags)
{
	unsigned long pfn = pmd_pfn(*pmd);
	struct mm_struct *mm = vma->vm_mm;
	struct dev_pagemap *pgmap;
	struct page *page;

	assert_spin_locked(pmd_lockptr(mm, pmd));

	/*
         * When we COW a devmap PMD entry, we split it into PTEs, so we should
         * not be in this function with `flags & FOLL_COW` set.
         */
	WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");

	if (flags & FOLL_WRITE && !pmd_write(*pmd))
		return NULL;

	if (pmd_present(*pmd) && pmd_devmap(*pmd))
		/* pass */;
	else
		return NULL;

	if (flags & FOLL_TOUCH)
		touch_pmd(vma, addr, pmd);

	/*
         * device mapped pages can only be returned if the
         * caller will manage the page reference count.
         */
	if (!(flags & FOLL_GET))
		return ERR_PTR(-EEXIST);

	pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
	pgmap = get_dev_pagemap(pfn, NULL);
	if (!pgmap)
		return ERR_PTR(-EFAULT);
	page = pfn_to_page(pfn);
	get_page(page);
	put_dev_pagemap(pgmap);

	return page;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 152 79.17% 5 50.00%
andrea arcangeli andrea arcangeli 26 13.54% 1 10.00%
keno fischer keno fischer 10 5.21% 1 10.00%
dominik dingel dominik dingel 2 1.04% 1 10.00%
aneesh kumar aneesh kumar 1 0.52% 1 10.00%
david rientjes david rientjes 1 0.52% 1 10.00%
Total 192 100.00% 10 100.00%



int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
		  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
		  struct vm_area_struct *vma)
{
	spinlock_t *dst_ptl, *src_ptl;
	struct page *src_page;
	pmd_t pmd;
	pgtable_t pgtable = NULL;
	int ret = -ENOMEM;

	/* Skip if can be re-fill on fault */
	if (!vma_is_anonymous(vma))
		return 0;

	pgtable = pte_alloc_one(dst_mm, addr);
	if (unlikely(!pgtable))
		goto out;

	dst_ptl = pmd_lock(dst_mm, dst_pmd);
	src_ptl = pmd_lockptr(src_mm, src_pmd);
	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);

	ret = -EAGAIN;
	pmd = *src_pmd;
	if (unlikely(!pmd_trans_huge(pmd))) {
		pte_free(dst_mm, pgtable);
		goto out_unlock;
	}
	/*
         * When page table lock is held, the huge zero pmd should not be
         * under splitting since we don't split the page itself, only pmd to
         * a page table.
         */
	if (is_huge_zero_pmd(pmd)) {
		struct page *zero_page;
		/*
                 * get_huge_zero_page() will never allocate a new page here,
                 * since we already have a zero page to copy. It just takes a
                 * reference.
                 */
		zero_page = mm_get_huge_zero_page(dst_mm);
		set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
				zero_page);
		ret = 0;
		goto out_unlock;
	}

	src_page = pmd_page(pmd);
	VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
	get_page(src_page);
	page_dup_rmap(src_page, true);
	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
	atomic_long_inc(&dst_mm->nr_ptes);
	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);

	pmdp_set_wrprotect(src_mm, addr, src_pmd);
	pmd = pmd_mkold(pmd_wrprotect(pmd));
	set_pmd_at(dst_mm, addr, dst_pmd, pmd);

	ret = 0;
out_unlock:
	spin_unlock(src_ptl);
	spin_unlock(dst_ptl);
out:
	return ret;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 257 85.67% 7 46.67%
andrea arcangeli andrea arcangeli 30 10.00% 3 20.00%
andi kleen andi kleen 4 1.33% 2 13.33%
aneesh kumar aneesh kumar 4 1.33% 1 6.67%
aaron lu aaron lu 4 1.33% 1 6.67%
michal hocko michal hocko 1 0.33% 1 6.67%
Total 300 100.00% 15 100.00%



void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
{
	pmd_t entry;
	unsigned long haddr;
	bool write = vmf->flags & FAULT_FLAG_WRITE;

	vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
		goto unlock;

	entry = pmd_mkyoung(orig_pmd);
	if (write)
		entry = pmd_mkdirty(entry);
	haddr = vmf->address & HPAGE_PMD_MASK;
	if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry, write))
		update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);

unlock:
	spin_unlock(vmf->ptl);
}

Contributors
Person Tokens Prop Commits CommitProp
matthew wilcox matthew wilcox 52 38.81% 2 33.33%
kirill a. shutemov kirill a. shutemov 45 33.58% 1 16.67%
minchan kim minchan kim 21 15.67% 1 16.67%
jan kara jan kara 13 9.70% 1 16.67%
dan williams dan williams 3 2.24% 1 16.67%
Total 134 100.00% 6 100.00%



static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
		struct page *page)
{
	struct vm_area_struct *vma = vmf->vma;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
	struct mem_cgroup *memcg;
	pgtable_t pgtable;
	pmd_t _pmd;
	int ret = 0, i;
	struct page **pages;
	unsigned long mmun_start;	/* For mmu_notifiers */
	unsigned long mmun_end;		/* For mmu_notifiers */

	pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR,
			GFP_KERNEL);
	if (unlikely(!pages)) {
		ret |= VM_FAULT_OOM;
		goto out;
	}

	for (i = 0; i < HPAGE_PMD_NR; i++) {
		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
					       vmf->address, page_to_nid(page));
		if (unlikely(!pages[i] ||
			     mem_cgroup_try_charge(pages[i], vma->vm_mm,
				     GFP_KERNEL, &memcg, false))) {
			if (pages[i])
				put_page(pages[i]);
			while (--i >= 0) {
				memcg = (void *)page_private(pages[i]);
				set_page_private(pages[i], 0);
				mem_cgroup_cancel_charge(pages[i], memcg,
						false);
				put_page(pages[i]);
			}
			kfree(pages);
			ret |= VM_FAULT_OOM;
			goto out;
		}
		set_page_private(pages[i], (unsigned long)memcg);
	}

	for (i = 0; i < HPAGE_PMD_NR; i++) {
		copy_user_highpage(pages[i], page + i,
				   haddr + PAGE_SIZE * i, vma);
		__SetPageUptodate(pages[i]);
		cond_resched();
	}

	mmun_start = haddr;
	mmun_end   = haddr + HPAGE_PMD_SIZE;
	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);

	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
		goto out_free_pages;
	VM_BUG_ON_PAGE(!PageHead(page), page);

	pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
	/* leave pmd empty until pte is filled */

	pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd);
	pmd_populate(vma->vm_mm, &_pmd, pgtable);

	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
		pte_t entry;
		entry = mk_pte(pages[i], vma->vm_page_prot);
		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
		memcg = (void *)page_private(pages[i]);
		set_page_private(pages[i], 0);
		page_add_new_anon_rmap(pages[i], vmf->vma, haddr, false);
		mem_cgroup_commit_charge(pages[i], memcg, false, false);
		lru_cache_add_active_or_unevictable(pages[i], vma);
		vmf->pte = pte_offset_map(&_pmd, haddr);
		VM_BUG_ON(!pte_none(*vmf->pte));
		set_pte_at(vma->vm_mm, haddr, vmf->pte, entry);
		pte_unmap(vmf->pte);
	}
	kfree(pages);

	smp_wmb(); /* make pte visible before pmd */
	pmd_populate(vma->vm_mm, vmf->pmd, pgtable);
	page_remove_rmap(page, true);
	spin_unlock(vmf->ptl);

	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);

	ret |= VM_FAULT_WRITE;
	put_page(page);

out:
	return ret;

out_free_pages:
	spin_unlock(vmf->ptl);
	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
	for (i = 0; i < HPAGE_PMD_NR; i++) {
		memcg = (void *)page_private(pages[i]);
		set_page_private(pages[i], 0);
		mem_cgroup_cancel_charge(pages[i], memcg, false);
		put_page(pages[i]);
	}
	kfree(pages);
	goto out;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 558 77.29% 4 40.00%
dan williams dan williams 70 9.70% 1 10.00%
matthew wilcox matthew wilcox 39 5.40% 2 20.00%
andrea arcangeli andrea arcangeli 36 4.99% 1 10.00%
jan kara jan kara 18 2.49% 1 10.00%
sasha levin sasha levin 1 0.14% 1 10.00%
Total 722 100.00% 10 100.00%



int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
{
	struct vm_area_struct *vma = vmf->vma;
	struct page *page = NULL, *new_page;
	struct mem_cgroup *memcg;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
	unsigned long mmun_start;	/* For mmu_notifiers */
	unsigned long mmun_end;		/* For mmu_notifiers */
	gfp_t huge_gfp;			/* for allocation and charge */
	int ret = 0;

	vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
	VM_BUG_ON_VMA(!vma->anon_vma, vma);
	if (is_huge_zero_pmd(orig_pmd))
		goto alloc;
	spin_lock(vmf->ptl);
	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
		goto out_unlock;

	page = pmd_page(orig_pmd);
	VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
	/*
         * We can only reuse the page if nobody else maps the huge page or it's
         * part.
         */
	if (page_trans_huge_mapcount(page, NULL) == 1) {
		pmd_t entry;
		entry = pmd_mkyoung(orig_pmd);
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry,  1))
			update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
		ret |= VM_FAULT_WRITE;
		goto out_unlock;
	}
	get_page(page);
	spin_unlock(vmf->ptl);
alloc:
	if (transparent_hugepage_enabled(vma) &&
	    !transparent_hugepage_debug_cow()) {
		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
	} else
		new_page = NULL;

	if (likely(new_page)) {
		prep_transhuge_page(new_page);
	} else {
		if (!page) {
			split_huge_pmd(vma, vmf->pmd, vmf->address);
			ret |= VM_FAULT_FALLBACK;
		} else {
			ret = do_huge_pmd_wp_page_fallback(vmf, orig_pmd, page);
			if (ret & VM_FAULT_OOM) {
				split_huge_pmd(vma, vmf->pmd, vmf->address);
				ret |= VM_FAULT_FALLBACK;
			}
			put_page(page);
		}
		count_vm_event(THP_FAULT_FALLBACK);
		goto out;
	}

	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
					huge_gfp, &memcg, true))) {
		put_page(new_page);
		split_huge_pmd(vma, vmf->pmd, vmf->address);
		if (page)
			put_page(page);
		ret |= VM_FAULT_FALLBACK;
		count_vm_event(THP_FAULT_FALLBACK);
		goto out;
	}

	count_vm_event(THP_FAULT_ALLOC);

	if (!page)
		clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
	else
		copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
	__SetPageUptodate(new_page);

	mmun_start = haddr;
	mmun_end   = haddr + HPAGE_PMD_SIZE;
	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);

	spin_lock(vmf->ptl);
	if (page)
		put_page(page);
	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {
		spin_unlock(vmf->ptl);
		mem_cgroup_cancel_charge(new_page, memcg, true);
		put_page(new_page);
		goto out_mn;
	} else {
		pmd_t entry;
		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
		page_add_new_anon_rmap(new_page, vma, haddr, true);
		mem_cgroup_commit_charge(new_page, memcg, false, true);
		lru_cache_add_active_or_unevictable(new_page, vma);
		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
		if (!page) {
			add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
		} else {
			VM_BUG_ON_PAGE(!PageHead(page), page);
			page_remove_rmap(page, true);
			put_page(page);
		}
		ret |= VM_FAULT_WRITE;
	}
	spin_unlock(vmf->ptl);
out_mn:
	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
out:
	return ret;
out_unlock:
	spin_unlock(vmf->ptl);
	return ret;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 494 67.67% 5 38.46%
will deacon will deacon 68 9.32% 1 7.69%
andrea arcangeli andrea arcangeli 64 8.77% 1 7.69%
ebru akagunduz ebru akagunduz 47 6.44% 2 15.38%
jan kara jan kara 28 3.84% 1 7.69%
sagi grimberg sagi grimberg 18 2.47% 1 7.69%
johannes weiner johannes weiner 7 0.96% 1 7.69%
andi kleen andi kleen 4 0.55% 1 7.69%
Total 730 100.00% 13 100.00%

/*
 * FOLL_FORCE can write to even unwritable pmd's, but only
 * after we've gone through a COW cycle and they are dirty.
 */


static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{
	return pmd_write(pmd) ||
	       ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
}

Contributors
Person Tokens Prop Commits CommitProp
keno fischer keno fischer 39 100.00% 1 100.00%
Total 39 100.00% 1 100.00%



struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
				   unsigned long addr,
				   pmd_t *pmd,
				   unsigned int flags)
{
	struct mm_struct *mm = vma->vm_mm;
	struct page *page = NULL;

	assert_spin_locked(pmd_lockptr(mm, pmd));

	if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
		goto out;

	/* Avoid dumping huge zero page */
	if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
		return ERR_PTR(-EFAULT);

	/* Full NUMA hinting faults to serialise migration in fault paths */
	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
		goto out;

	page = pmd_page(*pmd);
	VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
	if (flags & FOLL_TOUCH)
		touch_pmd(vma, addr, pmd);
	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
		/*
                 * We don't mlock() pte-mapped THPs. This way we can avoid
                 * leaking mlocked pages into non-VM_LOCKED VMAs.
                 *
                 * For anon THP:
                 *
                 * In most cases the pmd is the only mapping of the page as we
                 * break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
                 * writable private mappings in populate_vma_page_range().
                 *
                 * The only scenario when we have the page shared here is if we
                 * mlocking read-only mapping shared over fork(). We skip
                 * mlocking such pages.
                 *
                 * For file THP:
                 *
                 * We can expect PageDoubleMap() to be stable under page lock:
                 * for file pages we set it in page_add_file_rmap(), which
                 * requires page to be locked.
                 */

		if (PageAnon(page) && compound_mapcount(page) != 1)
			goto skip_mlock;
		if (PageDoubleMap(page) || !page->mapping)
			goto skip_mlock;
		if (!trylock_page(page))
			goto skip_mlock;
		lru_add_drain();
		if (page->mapping && !PageDoubleMap(page))
			mlock_vma_page(page);
		unlock_page(page);
	}
skip_mlock:
	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
	VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
	if (flags & FOLL_GET)
		get_page(page);

out:
	return page;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 204 72.60% 2 18.18%
andrea arcangeli andrea arcangeli 33 11.74% 2 18.18%
ebru akagunduz ebru akagunduz 26 9.25% 4 36.36%
dan williams dan williams 12 4.27% 1 9.09%
bob liu bob liu 3 1.07% 1 9.09%
keno fischer keno fischer 3 1.07% 1 9.09%
Total 281 100.00% 11 100.00%

/* NUMA hinting page fault entry point for trans huge pmds */


int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
{
	struct vm_area_struct *vma = vmf->vma;
	struct anon_vma *anon_vma = NULL;
	struct page *page;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
	int page_nid = -1, this_nid = numa_node_id();
	int target_nid, last_cpupid = -1;
	bool page_locked;
	bool migrated = false;
	bool was_writable;
	int flags = 0;

	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
	if (unlikely(!pmd_same(pmd, *vmf->pmd)))
		goto out_unlock;

	/*
         * If there are potential migrations, wait for completion and retry
         * without disrupting NUMA hinting information. Do not relock and
         * check_same as the page may no longer be mapped.
         */
	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
		page = pmd_page(*vmf->pmd);
		spin_unlock(vmf->ptl);
		wait_on_page_locked(page);
		goto out;
	}

	page = pmd_page(pmd);
	BUG_ON(is_huge_zero_page(page));
	page_nid = page_to_nid(page);
	last_cpupid = page_cpupid_last(page);
	count_vm_numa_event(NUMA_HINT_FAULTS);
	if (page_nid == this_nid) {
		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
		flags |= TNF_FAULT_LOCAL;
	}

	/* See similar comment in do_numa_page for explanation */
	if (!pmd_write(pmd))
		flags |= TNF_NO_GROUP;

	/*
         * Acquire the page lock to serialise THP migrations but avoid dropping
         * page_table_lock if at all possible
         */
	page_locked = trylock_page(page);
	target_nid = mpol_misplaced(page, vma, haddr);
	if (target_nid == -1) {
		/* If the page was locked, there are no parallel migrations */
		if (page_locked)
			goto clear_pmdnuma;
	}

	/* Migration could have started since the pmd_trans_migrating check */
	if (!page_locked) {
		spin_unlock(vmf->ptl);
		wait_on_page_locked(page);
		page_nid = -1;
		goto out;
	}

	/*
         * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
         * to serialises splits
         */
	get_page(page);
	spin_unlock(vmf->ptl);
	anon_vma = page_lock_anon_vma_read(page);

	/* Confirm the PMD did not change while page_table_lock was released */
	spin_lock(vmf->ptl);
	if (unlikely(!pmd_same(pmd, *vmf->pmd))) {
		unlock_page(page);
		put_page(page);
		page_nid = -1;
		goto out_unlock;
	}

	/* Bail if we fail to protect against THP splits for any reason */
	if (unlikely(!anon_vma)) {
		put_page(page);
		page_nid = -1;
		goto clear_pmdnuma;
	}

	/*
         * Migrate the THP to the requested node, returns with page unlocked
         * and access rights restored.
         */
	spin_unlock(vmf->ptl);
	migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
				vmf->pmd, pmd, vmf->address, page, target_nid);
	if (migrated) {
		flags |= TNF_MIGRATED;
		page_nid = target_nid;
	} else
		flags |= TNF_MIGRATE_FAIL;

	goto out;
clear_pmdnuma:
	BUG_ON(!PageLocked(page));
	was_writable = pmd_write(pmd);
	pmd = pmd_modify(pmd, vma->vm_page_prot);
	pmd = pmd_mkyoung(pmd);
	if (was_writable)
		pmd = pmd_mkwrite(pmd);
	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
	unlock_page(page);
out_unlock:
	spin_unlock(vmf->ptl);

out:
	if (anon_vma)
		page_unlock_anon_vma_read(anon_vma);

	if (page_nid != -1)
		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
				vmf->flags);

	return 0;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 446 81.39% 4 26.67%
andrea arcangeli andrea arcangeli 43 7.85% 3 20.00%
ebru akagunduz ebru akagunduz 26 4.74% 3 20.00%
jan kara jan kara 22 4.01% 1 6.67%
rik van riel rik van riel 4 0.73% 1 6.67%
andi kleen andi kleen 3 0.55% 1 6.67%
david rientjes david rientjes 2 0.36% 1 6.67%
bob liu bob liu 2 0.36% 1 6.67%
Total 548 100.00% 15 100.00%

/*
 * Return true if we do MADV_FREE successfully on entire pmd page.
 * Otherwise, return false.
 */


bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
		pmd_t *pmd, unsigned long addr, unsigned long next)
{
	spinlock_t *ptl;
	pmd_t orig_pmd;
	struct page *page;
	struct mm_struct *mm = tlb->mm;
	bool ret = false;

	tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);

	ptl = pmd_trans_huge_lock(pmd, vma);
	if (!ptl)
		goto out_unlocked;

	orig_pmd = *pmd;
	if (is_huge_zero_pmd(orig_pmd))
		goto out;

	page = pmd_page(orig_pmd);
	/*
         * If other processes are mapping this page, we couldn't discard
         * the page unless they all do MADV_FREE so let's skip the page.
         */
	if (page_mapcount(page) != 1)
		goto out;

	if (!trylock_page(page))
		goto out;

	/*
         * If user want to discard part-pages of THP, split it so MADV_FREE
         * will deactivate only them.
         */
	if (next - addr != HPAGE_PMD_SIZE) {
		get_page(page);
		spin_unlock(ptl);
		split_huge_page(page);
		put_page(page);
		unlock_page(page);
		goto out_unlocked;
	}

	if (PageDirty(page))
		ClearPageDirty(page);
	unlock_page(page);

	if (PageActive(page))
		deactivate_page(page);

	if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
		orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
			tlb->fullmm);
		orig_pmd = pmd_mkold(orig_pmd);
		orig_pmd = pmd_mkclean(orig_pmd);

		set_pmd_at(mm, addr, pmd, orig_pmd);
		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
	}
	ret = true;
out:
	spin_unlock(ptl);
out_unlocked:
	return ret;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 195 71.69% 2 22.22%
andrea arcangeli andrea arcangeli 58 21.32% 3 33.33%
aneesh kumar aneesh kumar 7 2.57% 1 11.11%
ebru akagunduz ebru akagunduz 6 2.21% 1 11.11%
huang ying huang ying 4 1.47% 1 11.11%
bob liu bob liu 2 0.74% 1 11.11%
Total 272 100.00% 9 100.00%



static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
{
	pgtable_t pgtable;

	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
	pte_free(mm, pgtable);
	atomic_long_dec(&mm->nr_ptes);
}

Contributors
Person Tokens Prop Commits CommitProp
aneesh kumar aneesh kumar 43 100.00% 1 100.00%
Total 43 100.00% 1 100.00%



int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
		 pmd_t *pmd, unsigned long addr)
{
	pmd_t orig_pmd;
	spinlock_t *ptl;

	tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);

	ptl = __pmd_trans_huge_lock(pmd, vma);
	if (!ptl)
		return 0;
	/*
         * For architectures like ppc64 we look at deposited pgtable
         * when calling pmdp_huge_get_and_clear. So do the
         * pgtable_trans_huge_withdraw after finishing pmdp related
         * operations.
         */
	orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
			tlb->fullmm);
	tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
	if (vma_is_dax(vma)) {
		spin_unlock(ptl);
		if (is_huge_zero_pmd(orig_pmd))
			tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
	} else if (is_huge_zero_pmd(orig_pmd)) {
		pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
		atomic_long_dec(&tlb->mm->nr_ptes);
		spin_unlock(ptl);
		tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
	} else {
		struct page *page = pmd_page(orig_pmd);
		page_remove_rmap(page, true);
		VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
		VM_BUG_ON_PAGE(!PageHead(page), page);
		if (PageAnon(page)) {
			pgtable_t pgtable;
			pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
			pte_free(tlb->mm, pgtable);
			atomic_long_dec(&tlb->mm->nr_ptes);
			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
		} else {
			if (arch_needs_pgtable_deposit())
				zap_deposited_table(tlb->mm, pmd);
			add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
		}
		spin_unlock(ptl);
		tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
	}
	return 1;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 239 77.10% 2 16.67%
andrea arcangeli andrea arcangeli 32 10.32% 3 25.00%
aneesh kumar aneesh kumar 27 8.71% 3 25.00%
xiao guangrong xiao guangrong 12 3.87% 4 33.33%
Total 310 100.00% 12 100.00%

#ifndef pmd_move_must_withdraw


static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
					 spinlock_t *old_pmd_ptl,
					 struct vm_area_struct *vma)
{
	/*
         * With split pmd lock we also need to move preallocated
         * PTE page table if new_pmd is on different PMD page table.
         *
         * We also don't deposit and withdraw tables for file pages.
         */
	return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
}
Contributors
Person Tokens Prop Commits CommitProp
aneesh kumar aneesh kumar 33 100.00% 1 100.00%
Total 33 100.00% 1 100.00%

#endif



bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
		  unsigned long new_addr, unsigned long old_end,
		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
{
	spinlock_t *old_ptl, *new_ptl;
	pmd_t pmd;
	struct mm_struct *mm = vma->vm_mm;
	bool force_flush = false;

	if ((old_addr & ~HPAGE_PMD_MASK) ||
	    (new_addr & ~HPAGE_PMD_MASK) ||
	    old_end - old_addr < HPAGE_PMD_SIZE)
		return false;

	/*
         * The destination pmd shouldn't be established, free_pgtables()
         * should have release it.
         */
	if (WARN_ON(!pmd_none(*new_pmd))) {
		VM_BUG_ON(pmd_trans_huge(*new_pmd));
		return false;
	}

	/*
         * We don't have to worry about the ordering of src and dst
         * ptlocks because exclusive mmap_sem prevents deadlock.
         */
	old_ptl = __pmd_trans_huge_lock(old_pmd, vma);
	if (old_ptl) {
		new_ptl = pmd_lockptr(mm, new_pmd);
		if (new_ptl != old_ptl)
			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
		if (pmd_present(pmd) && pmd_dirty(pmd))
			force_flush = true;
		VM_BUG_ON(!pmd_none(*new_pmd));

		if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) {
			pgtable_t pgtable;
			pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
			pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
		}
		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
		if (new_ptl != old_ptl)
			spin_unlock(new_ptl);
		if (force_flush)
			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
		else
			*need_flush = true;
		spin_unlock(old_ptl);
		return true;
	}
	return false;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 215 77.90% 3 33.33%
aaron lu aaron lu 57 20.65% 2 22.22%
andrea arcangeli andrea arcangeli 2 0.72% 2 22.22%
xiao guangrong xiao guangrong 1 0.36% 1 11.11%
aneesh kumar aneesh kumar 1 0.36% 1 11.11%
Total 276 100.00% 9 100.00%

/*
 * Returns
 *  - 0 if PMD could not be locked
 *  - 1 if PMD was locked but protections unchange and TLB flush unnecessary
 *  - HPAGE_PMD_NR is protections changed and TLB flush necessary
 */


int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, pgprot_t newprot, int prot_numa)
{
	struct mm_struct *mm = vma->vm_mm;
	spinlock_t *ptl;
	int ret = 0;

	ptl = __pmd_trans_huge_lock(pmd, vma);
	if (ptl) {
		pmd_t entry;
		bool preserve_write = prot_numa && pmd_write(*pmd);
		ret = 1;

		/*
                 * Avoid trapping faults against the zero page. The read-only
                 * data is likely to be read-cached on the local CPU and
                 * local/remote hits to the zero page are not interesting.
                 */
		if (prot_numa && is_huge_zero_pmd(*pmd)) {
			spin_unlock(ptl);
			return ret;
		}

		if (!prot_numa || !pmd_protnone(*pmd)) {
			entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
			entry = pmd_modify(entry, newprot);
			if (preserve_write)
				entry = pmd_mkwrite(entry);
			ret = HPAGE_PMD_NR;
			set_pmd_at(mm, addr, pmd, entry);
			BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
					pmd_write(entry));
		}
		spin_unlock(ptl);
	}

	return ret;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 150 83.33% 4 57.14%
andrea arcangeli andrea arcangeli 19 10.56% 1 14.29%
david rientjes david rientjes 7 3.89% 1 14.29%
matthew wilcox matthew wilcox 4 2.22% 1 14.29%
Total 180 100.00% 7 100.00%

/*
 * Returns page table lock pointer if a given pmd maps a thp, NULL otherwise.
 *
 * Note that if it returns page table lock pointer, this routine returns without
 * unlocking page table lock. So callers must unlock it.
 */


spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
{
	spinlock_t *ptl;
	ptl = pmd_lock(vma->vm_mm, pmd);
	if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
		return ptl;
	spin_unlock(ptl);
	return NULL;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 51 87.93% 3 60.00%
andrea arcangeli andrea arcangeli 6 10.34% 1 20.00%
matthew wilcox matthew wilcox 1 1.72% 1 20.00%
Total 58 100.00% 5 100.00%



static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
		unsigned long haddr, pmd_t *pmd)
{
	struct mm_struct *mm = vma->vm_mm;
	pgtable_t pgtable;
	pmd_t _pmd;
	int i;

	/* leave pmd empty until pte is filled */
	pmdp_huge_clear_flush_notify(vma, haddr, pmd);

	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
	pmd_populate(mm, &_pmd, pgtable);

	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
		pte_t *pte, entry;
		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
		entry = pte_mkspecial(entry);
		pte = pte_offset_map(&_pmd, haddr);
		VM_BUG_ON(!pte_none(*pte));
		set_pte_at(mm, haddr, pte, entry);
		pte_unmap(pte);
	}
	smp_wmb(); /* make pte visible before pmd */
	pmd_populate(mm, pmd, pgtable);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 161 100.00% 1 100.00%
Total 161 100.00% 1 100.00%



static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long haddr, bool freeze)
{
	struct mm_struct *mm = vma->vm_mm;
	struct page *page;
	pgtable_t pgtable;
	pmd_t _pmd;
	bool young, write, dirty, soft_dirty;
	unsigned long addr;
	int i;

	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
	VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd));

	count_vm_event(THP_SPLIT_PMD);

	if (!vma_is_anonymous(vma)) {
		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
		/*
                 * We are going to unmap this huge page. So
                 * just go ahead and zap it
                 */
		if (arch_needs_pgtable_deposit())
			zap_deposited_table(mm, pmd);
		if (vma_is_dax(vma))
			return;
		page = pmd_page(_pmd);
		if (!PageReferenced(page) && pmd_young(_pmd))
			SetPageReferenced(page);
		page_remove_rmap(page, true);
		put_page(page);
		add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR);
		return;
	} else if (is_huge_zero_pmd(*pmd)) {
		return __split_huge_zero_page_pmd(vma, haddr, pmd);
	}

	page = pmd_page(*pmd);
	VM_BUG_ON_PAGE(!page_count(page), page);
	page_ref_add(page, HPAGE_PMD_NR - 1);
	write = pmd_write(*pmd);
	young = pmd_young(*pmd);
	dirty = pmd_dirty(*pmd);
	soft_dirty = pmd_soft_dirty(*pmd);

	pmdp_huge_split_prepare(vma, haddr, pmd);
	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
	pmd_populate(mm, &_pmd, pgtable);

	for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
		pte_t entry, *pte;
		/*
                 * Note that NUMA hinting access restrictions are not
                 * transferred to avoid any possibility of altering
                 * permissions across VMAs.
                 */
		if (freeze) {
			swp_entry_t swp_entry;
			swp_entry = make_migration_entry(page + i, write);
			entry = swp_entry_to_pte(swp_entry);
			if (soft_dirty)
				entry = pte_swp_mksoft_dirty(entry);
		} else {
			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
			entry = maybe_mkwrite(entry, vma);
			if (!write)
				entry = pte_wrprotect(entry);
			if (!young)
				entry = pte_mkold(entry);
			if (soft_dirty)
				entry = pte_mksoft_dirty(entry);
		}
		if (dirty)
			SetPageDirty(page + i);
		pte = pte_offset_map(&_pmd, addr);
		BUG_ON(!pte_none(*pte));
		set_pte_at(mm, addr, pte, entry);
		atomic_inc(&page[i]._mapcount);
		pte_unmap(pte);
	}

	/*
         * Set PG_double_map before dropping compound_mapcount to avoid
         * false-negative page_mapped().
         */
	if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
		for (i = 0; i < HPAGE_PMD_NR; i++)
			atomic_inc(&page[i]._mapcount);
	}

	if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
		/* Last compound_mapcount is gone. */
		__dec_node_page_state(page, NR_ANON_THPS);
		if (TestClearPageDoubleMap(page)) {
			/* No need in mapcount reference anymore */
			for (i = 0; i < HPAGE_PMD_NR; i++)
				atomic_dec(&page[i]._mapcount);
		}
	}

	smp_wmb(); /* make pte visible before pmd */
	/*
         * Up to this point the pmd is present and huge and userland has the
         * whole access to the hugepage during the split (which happens in
         * place). If we overwrite the pmd with the not-huge version pointing
         * to the pte here (which of course we could if all CPUs were bug
         * free), userland could trigger a small page size TLB miss on the
         * small sized TLB while the hugepage TLB entry is still established in
         * the huge TLB. Some CPU doesn't like that.
         * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum
         * 383 on page 93. Intel should be safe but is also warns that it's
         * only safe if the permission and cache attributes of the two entries
         * loaded in the two TLB is identical (which should be the case here).
         * But it is generally safer to never allow small and huge TLB entries
         * for the same virtual address to be loaded simultaneously. So instead
         * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the
         * current pmd notpresent (atomically because here the pmd_trans_huge
         * and pmd_trans_splitting must remain set at all times on the pmd
         * until the split is complete for this pmd), then we flush the SMP TLB
         * and finally we write the non-huge version of the pmd entry with
         * pmd_populate.
         */
	pmdp_invalidate(vma, haddr, pmd);
	pmd_populate(mm, pmd, pgtable);

	if (freeze) {
		for (i = 0; i < HPAGE_PMD_NR; i++) {
			page_remove_rmap(page + i, false);
			put_page(page + i);
		}
	}
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 570 86.49% 6 42.86%
andrea arcangeli andrea arcangeli 35 5.31% 2 14.29%
aneesh kumar aneesh kumar 22 3.34% 2 14.29%
minchan kim minchan kim 21 3.19% 1 7.14%
dan williams dan williams 7 1.06% 1 7.14%
joonsoo kim joonsoo kim 3 0.46% 1 7.14%
mel gorman mel gorman 1 0.15% 1 7.14%
Total 659 100.00% 14 100.00%



void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long address, bool freeze, struct page *page)
{
	spinlock_t *ptl;
	struct mm_struct *mm = vma->vm_mm;
	unsigned long haddr = address & HPAGE_PMD_MASK;

	mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
	ptl = pmd_lock(mm, pmd);

	/*
         * If caller asks to setup a migration entries, we need a page to check
         * pmd against. Otherwise we can end up replacing wrong page.
         */
	VM_BUG_ON(freeze && !page);
	if (page && page != pmd_page(*pmd))
	        goto out;

	if (pmd_trans_huge(*pmd)) {
		page = pmd_page(*pmd);
		if (PageMlocked(page))
			clear_page_mlock(page);
	} else if (!pmd_devmap(*pmd))
		goto out;
	__split_huge_pmd_locked(vma, pmd, haddr, freeze);
out:
	spin_unlock(ptl);
	mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 107 65.64% 4 66.67%
naoya horiguchi naoya horiguchi 31 19.02% 1 16.67%
dan williams dan williams 25 15.34% 1 16.67%
Total 163 100.00% 6 100.00%



void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
		bool freeze, struct page *page)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	pgd = pgd_offset(vma->vm_mm, address);
	if (!pgd_present(*pgd))
		return;

	pud = pud_offset(pgd, address);
	if (!pud_present(*pud))
		return;

	pmd = pmd_offset(pud, address);

	__split_huge_pmd(vma, pmd, address, freeze, page);
}

Contributors
Person Tokens Prop Commits CommitProp
hugh dickins hugh dickins 45 46.88% 1 14.29%
andrea arcangeli andrea arcangeli 25 26.04% 1 14.29%
kirill a. shutemov kirill a. shutemov 24 25.00% 4 57.14%
naoya horiguchi naoya horiguchi 2 2.08% 1 14.29%
Total 96 100.00% 7 100.00%



void vma_adjust_trans_huge(struct vm_area_struct *vma,
			     unsigned long start,
			     unsigned long end,
			     long adjust_next)
{
	/*
         * If the new start address isn't hpage aligned and it could
         * previously contain an hugepage: check if we need to split
         * an huge pmd.
         */
	if (start & ~HPAGE_PMD_MASK &&
	    (start & HPAGE_PMD_MASK) >= vma->vm_start &&
	    (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
		split_huge_pmd_address(vma, start, false, NULL);

	/*
         * If the new end address isn't hpage aligned and it could
         * previously contain an hugepage: check if we need to split
         * an huge pmd.
         */
	if (end & ~HPAGE_PMD_MASK &&
	    (end & HPAGE_PMD_MASK) >= vma->vm_start &&
	    (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
		split_huge_pmd_address(vma, end, false, NULL);

	/*
         * If we're also updating the vma->vm_next->vm_start, if the new
         * vm_next->vm_start isn't page aligned and it could previously
         * contain an hugepage: check if we need to split an huge pmd.
         */
	if (adjust_next > 0) {
		struct vm_area_struct *next = vma->vm_next;
		unsigned long nstart = next->vm_start;
		nstart += adjust_next << PAGE_SHIFT;
		if (nstart & ~HPAGE_PMD_MASK &&
		    (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
		    (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
			split_huge_pmd_address(next, nstart, false, NULL);
	}
}

Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 149 85.14% 1 20.00%
kirill a. shutemov kirill a. shutemov 26 14.86% 4 80.00%
Total 175 100.00% 5 100.00%



static void freeze_page(struct page *page)
{
	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
		TTU_RMAP_LOCKED;
	int i, ret;

	VM_BUG_ON_PAGE(!PageHead(page), page);

	if (PageAnon(page))
		ttu_flags |= TTU_MIGRATION;

	/* We only need TTU_SPLIT_HUGE_PMD once */
	ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD);
	for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
		/* Cut short if the page is unmapped */
		if (page_count(page) == 1)
			return;

		ret = try_to_unmap(page + i, ttu_flags);
	}
	VM_BUG_ON_PAGE(ret, page + i - 1);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 111 100.00% 3 100.00%
Total 111 100.00% 3 100.00%



static void unfreeze_page(struct page *page)
{
	int i;

	for (i = 0; i < HPAGE_PMD_NR; i++)
		remove_migration_ptes(page + i, page + i, true);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 40 100.00% 2 100.00%
Total 40 100.00% 2 100.00%



static void __split_huge_page_tail(struct page *head, int tail,
		struct lruvec *lruvec, struct list_head *list)
{
	struct page *page_tail = head + tail;

	VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
	VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);

	/*
         * tail_page->_refcount is zero and not changing from under us. But
         * get_page_unless_zero() may be running from under us on the
         * tail_page. If we used atomic_set() below instead of atomic_inc() or
         * atomic_add(), we would then run atomic_set() concurrently with
         * get_page_unless_zero(), and atomic_set() is implemented in C not
         * using locked ops. spin_unlock on x86 sometime uses locked ops
         * because of PPro errata 66, 92, so unless somebody can guarantee
         * atomic_set() here would be safe on all archs (and not only on x86),
         * it's safer to use atomic_inc()/atomic_add().
         */
	if (PageAnon(head)) {
		page_ref_inc(page_tail);
	} else {
		/* Additional pin to radix tree */
		page_ref_add(page_tail, 2);
	}

	page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
	page_tail->flags |= (head->flags &
			((1L << PG_referenced) |
			 (1L << PG_swapbacked) |
			 (1L << PG_mlocked) |
			 (1L << PG_uptodate) |
			 (1L << PG_active) |
			 (1L << PG_locked) |
			 (1L << PG_unevictable) |
			 (1L << PG_dirty)));

	/*
         * After clearing PageTail the gup refcount can be released.
         * Page flags also must be visible before we make the page non-compound.
         */
	smp_wmb();

	clear_compound_head(page_tail);

	if (page_is_young(head))
		set_page_young(page_tail);
	if (page_is_idle(head))
		set_page_idle(page_tail);

	/* ->mapping in first tail page is compound_mapcount */
	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
			page_tail);
	page_tail->mapping = head->mapping;

	page_tail->index = head->index + tail;
	page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
	lru_add_page_tail(head, page_tail, lruvec, list);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 237 97.93% 4 66.67%
minchan kim minchan kim 3 1.24% 1 16.67%
joonsoo kim joonsoo kim 2 0.83% 1 16.67%
Total 242 100.00% 6 100.00%



static void __split_huge_page(struct page *page, struct list_head *list,
		unsigned long flags)
{
	struct page *head = compound_head(page);
	struct zone *zone = page_zone(head);
	struct lruvec *lruvec;
	pgoff_t end = -1;
	int i;

	lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);

	/* complete memcg works before add pages to LRU */
	mem_cgroup_split_huge_fixup(head);

	if (!PageAnon(page))
		end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);

	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
		__split_huge_page_tail(head, i, lruvec, list);
		/* Some pages can be beyond i_size: drop them from page cache */
		if (head[i].index >= end) {
			__ClearPageDirty(head + i);
			__delete_from_page_cache(head + i, NULL);
			if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
				shmem_uncharge(head->mapping->host, 1);
			put_page(head + i);
		}
	}

	ClearPageCompound(head);
	/* See comment in __split_huge_page_tail() */
	if (PageAnon(head)) {
		page_ref_inc(head);
	} else {
		/* Additional pin to radix tree */
		page_ref_add(head, 2);
		spin_unlock(&head->mapping->tree_lock);
	}

	spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);

	unfreeze_page(head);

	for (i = 0; i < HPAGE_PMD_NR; i++) {
		struct page *subpage = head + i;
		if (subpage == page)
			continue;
		unlock_page(subpage);

		/*
                 * Subpages may be freed if there wasn't any mapping
                 * like if add_to_swap() is running on a lru page that
                 * had its mapping zapped. And freeing these pages
                 * requires taking the lru_lock so we do the put_page
                 * of the tail pages after the split is complete.
                 */
		put_page(subpage);
	}
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 279 98.24% 3 60.00%
mel gorman mel gorman 5 1.76% 2 40.00%
Total 284 100.00% 5 100.00%



int total_mapcount(struct page *page)
{
	int i, compound, ret;

	VM_BUG_ON_PAGE(PageTail(page), page);

	if (likely(!PageCompound(page)))
		return atomic_read(&page->_mapcount) + 1;

	compound = compound_mapcount(page);
	if (PageHuge(page))
		return compound;
	ret = compound;
	for (i = 0; i < HPAGE_PMD_NR; i++)
		ret += atomic_read(&page[i]._mapcount) + 1;
	/* File pages has compound_mapcount included in _mapcount */
	if (!PageAnon(page))
		return ret - compound * HPAGE_PMD_NR;
	if (PageDoubleMap(page))
		ret -= HPAGE_PMD_NR;
	return ret;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 128 100.00% 2 100.00%
Total 128 100.00% 2 100.00%

/*
 * This calculates accurately how many mappings a transparent hugepage
 * has (unlike page_mapcount() which isn't fully accurate). This full
 * accuracy is primarily needed to know if copy-on-write faults can
 * reuse the page and change the mapping to read-write instead of
 * copying them. At the same time this returns the total_mapcount too.
 *
 * The function returns the highest mapcount any one of the subpages
 * has. If the return value is one, even if different processes are
 * mapping different subpages of the transparent hugepage, they can
 * all reuse it, because each process is reusing a different subpage.
 *
 * The total_mapcount is instead counting all virtual mappings of the
 * subpages. If the total_mapcount is equal to "one", it tells the
 * caller all mappings belong to the same "mm" and in turn the
 * anon_vma of the transparent hugepage can become the vma->anon_vma
 * local one as no other process may be mapping any of the subpages.
 *
 * It would be more accurate to replace page_mapcount() with
 * page_trans_huge_mapcount(), however we only use
 * page_trans_huge_mapcount() in the copy-on-write faults where we
 * need full accuracy to avoid breaking page pinning, because
 * page_trans_huge_mapcount() is slower than page_mapcount().
 */


int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
{
	int i, ret, _total_mapcount, mapcount;

	/* hugetlbfs shouldn't call it */
	VM_BUG_ON_PAGE(PageHuge(page), page);

	if (likely(!PageTransCompound(page))) {
		mapcount = atomic_read(&page->_mapcount) + 1;
		if (total_mapcount)
			*total_mapcount = mapcount;
		return mapcount;
	}

	page = compound_head(page);

	_total_mapcount = ret = 0;
	for (i = 0; i < HPAGE_PMD_NR; i++) {
		mapcount = atomic_read(&page[i]._mapcount) + 1;
		ret = max(ret, mapcount);
		_total_mapcount += mapcount;
	}
	if (PageDoubleMap(page)) {
		ret -= 1;
		_total_mapcount -= HPAGE_PMD_NR;
	}
	mapcount = compound_mapcount(page);
	ret += mapcount;
	_total_mapcount += mapcount;
	if (total_mapcount)
		*total_mapcount = _total_mapcount;
	return ret;
}

Contributors
Person Tokens Prop Commits CommitProp
andrea arcangeli andrea arcangeli 171 100.00% 1 100.00%
Total 171 100.00% 1 100.00%

/*
 * This function splits huge page into normal pages. @page can point to any
 * subpage of huge page to split. Split doesn't change the position of @page.
 *
 * Only caller must hold pin on the @page, otherwise split fails with -EBUSY.
 * The huge page must be locked.
 *
 * If @list is null, tail pages will be added to LRU list, otherwise, to @list.
 *
 * Both head page and tail pages will inherit mapping, flags, and so on from
 * the hugepage.
 *
 * GUP pin and PG_locked transferred to @page. Rest subpages can be freed if
 * they are not mapped.
 *
 * Returns 0 if the hugepage is split successfully.
 * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
 * us.
 */


int split_huge_page_to_list(struct page *page, struct list_head *list)
{
	struct page *head = compound_head(page);
	struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
	struct anon_vma *anon_vma = NULL;
	struct address_space *mapping = NULL;
	int count, mapcount, extra_pins, ret;
	bool mlocked;
	unsigned long flags;

	VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
	VM_BUG_ON_PAGE(!PageLocked(page), page);
	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
	VM_BUG_ON_PAGE(!PageCompound(page), page);

	if (PageAnon(head)) {
		/*
                 * The caller does not necessarily hold an mmap_sem that would
                 * prevent the anon_vma disappearing so we first we take a
                 * reference to it and then lock the anon_vma for write. This
                 * is similar to page_lock_anon_vma_read except the write lock
                 * is taken to serialise against parallel split or collapse
                 * operations.
                 */
		anon_vma = page_get_anon_vma(head);
		if (!anon_vma) {
			ret = -EBUSY;
			goto out;
		}
		extra_pins = 0;
		mapping = NULL;
		anon_vma_lock_write(anon_vma);
	} else {
		mapping = head->mapping;

		/* Truncated ? */
		if (!mapping) {
			ret = -EBUSY;
			goto out;
		}

		/* Addidional pins from radix tree */
		extra_pins = HPAGE_PMD_NR;
		anon_vma = NULL;
		i_mmap_lock_read(mapping);
	}

	/*
         * Racy check if we can split the page, before freeze_page() will
         * split PMDs
         */
	if (total_mapcount(head) != page_count(head) - extra_pins - 1) {
		ret = -EBUSY;
		goto out_unlock;
	}

	mlocked = PageMlocked(page);
	freeze_page(head);
	VM_BUG_ON_PAGE(compound_mapcount(head), head);

	/* Make sure the page is not on per-CPU pagevec as it takes pin */
	if (mlocked)
		lru_add_drain();

	/* prevent PageLRU to go away from under us, and freeze lru stats */
	spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);

	if (mapping) {
		void **pslot;

		spin_lock(&mapping->tree_lock);
		pslot = radix_tree_lookup_slot(&mapping->page_tree,
				page_index(head));
		/*
                 * Check if the head page is present in radix tree.
                 * We assume all tail are present too, if head is there.
                 */
		if (radix_tree_deref_slot_protected(pslot,
					&mapping->tree_lock) != head)
			goto fail;
	}

	/* Prevent deferred_split_scan() touching ->_refcount */
	spin_lock(&pgdata->split_queue_lock);
	count = page_count(head);
	mapcount = total_mapcount(head);
	if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
		if (!list_empty(page_deferred_list(head))) {
			pgdata->split_queue_len--;
			list_del(page_deferred_list(head));
		}
		if (mapping)
			__dec_node_page_state(page, NR_SHMEM_THPS);
		spin_unlock(&pgdata->split_queue_lock);
		__split_huge_page(page, list, flags);
		ret = 0;
	} else {
		if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
			pr_alert("total_mapcount: %u, page_count(): %u\n",
					mapcount, count);
			if (PageTail(page))
				dump_page(head, NULL);
			dump_page(page, "total_mapcount(head) > 0");
			BUG();
		}
		spin_unlock(&pgdata->split_queue_lock);
fail:		if (mapping)
			spin_unlock(&mapping->tree_lock);
		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
		unfreeze_page(head);
		ret = -EBUSY;
	}

out_unlock:
	if (anon_vma) {
		anon_vma_unlock_write(anon_vma);
		put_anon_vma(anon_vma);
	}
	if (mapping)
		i_mmap_unlock_read(mapping);
out:
	count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
	return ret;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 542 98.72% 8 80.00%
mel gorman mel gorman 7 1.28% 2 20.00%
Total 549 100.00% 10 100.00%



void free_transhuge_page(struct page *page)
{
	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
	unsigned long flags;

	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
	if (!list_empty(page_deferred_list(page))) {
		pgdata->split_queue_len--;
		list_del(page_deferred_list(page));
	}
	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
	free_compound_page(page);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 78 100.00% 2 100.00%
Total 78 100.00% 2 100.00%



void deferred_split_huge_page(struct page *page)
{
	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
	unsigned long flags;

	VM_BUG_ON_PAGE(!PageTransHuge(page), page);

	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
	if (list_empty(page_deferred_list(page))) {
		count_vm_event(THP_DEFERRED_SPLIT_PAGE);
		list_add_tail(page_deferred_list(page), &pgdata->split_queue);
		pgdata->split_queue_len++;
	}
	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 93 100.00% 3 100.00%
Total 93 100.00% 3 100.00%



static unsigned long deferred_split_count(struct shrinker *shrink,
		struct shrink_control *sc)
{
	struct pglist_data *pgdata = NODE_DATA(sc->nid);
	return ACCESS_ONCE(pgdata->split_queue_len);
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 37 100.00% 2 100.00%
Total 37 100.00% 2 100.00%



static unsigned long deferred_split_scan(struct shrinker *shrink,
		struct shrink_control *sc)
{
	struct pglist_data *pgdata = NODE_DATA(sc->nid);
	unsigned long flags;
	LIST_HEAD(list), *pos, *next;
	struct page *page;
	int split = 0;

	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
	/* Take pin on all head pages to avoid freeing them under us */
	list_for_each_safe(pos, next, &pgdata->split_queue) {
		page = list_entry((void *)pos, struct page, mapping);
		page = compound_head(page);
		if (get_page_unless_zero(page)) {
			list_move(page_deferred_list(page), &list);
		} else {
			/* We lost race with put_compound_page() */
			list_del_init(page_deferred_list(page));
			pgdata->split_queue_len--;
		}
		if (!--sc->nr_to_scan)
			break;
	}
	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);

	list_for_each_safe(pos, next, &list) {
		page = list_entry((void *)pos, struct page, mapping);
		lock_page(page);
		/* split_huge_page() removes page from list on success */
		if (!split_huge_page(page))
			split++;
		unlock_page(page);
		put_page(page);
	}

	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
	list_splice_tail(&list, &pgdata->split_queue);
	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);

	/*
         * Stop shrinker if we didn't split any page, but the queue is empty.
         * This can happen if pages were freed under us.
         */
	if (!split && list_empty(&pgdata->split_queue))
		return SHRINK_STOP;
	return split;
}

Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 258 100.00% 5 100.00%
Total 258 100.00% 5 100.00%


static struct shrinker deferred_split_shrinker = {
	.count_objects = deferred_split_count,
	.scan_objects = deferred_split_scan,
	.seeks = DEFAULT_SEEKS,
	.flags = SHRINKER_NUMA_AWARE,
};

#ifdef CONFIG_DEBUG_FS


static int split_huge_pages_set(void *data, u64 val)
{
	struct zone *zone;
	struct page *page;
	unsigned long pfn, max_zone_pfn;
	unsigned long total = 0, split = 0;

	if (val != 1)
		return -EINVAL;

	for_each_populated_zone(zone) {
		max_zone_pfn = zone_end_pfn(zone);
		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
			if (!pfn_valid(pfn))
				continue;

			page = pfn_to_page(pfn);
			if (!get_page_unless_zero(page))
				continue;

			if (zone != page_zone(page))
				goto next;

			if (!PageHead(page) || PageHuge(page) || !PageLRU(page))
				goto next;

			total++;
			lock_page(page);
			if (!split_huge_page(page))
				split++;
			unlock_page(page);
next:
			put_page(page);
		}
	}

	pr_info("%lu of %lu THP split\n", split, total);

	return 0;
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 180 99.45% 2 66.67%
yang shi yang shi 1 0.55% 1 33.33%
Total 181 100.00% 3 100.00%

DEFINE_SIMPLE_ATTRIBUTE(split_huge_pages_fops, NULL, split_huge_pages_set,
		"%llu\n");



static int __init split_huge_pages_debugfs(void)
{
	void *ret;

	ret = debugfs_create_file("split_huge_pages", 0200, NULL, NULL,
			&split_huge_pages_fops);
	if (!ret)
		pr_warn("Failed to create split_huge_pages in debugfs");
	return 0;
}
Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 41 97.62% 1 50.00%
yang shi yang shi 1 2.38% 1 50.00%
Total 42 100.00% 2 100.00%


late_initcall(split_huge_pages_debugfs);
#endif
Overall Contributors
Person Tokens Prop Commits CommitProp
kirill a. shutemov kirill a. shutemov 7626 67.94% 54 36.73%
andrea arcangeli andrea arcangeli 1778 15.84% 17 11.56%
toshi kani toshi kani 251 2.24% 1 0.68%
mel gorman mel gorman 215 1.92% 5 3.40%
aaron lu aaron lu 154 1.37% 3 2.04%
ebru akagunduz ebru akagunduz 152 1.35% 6 4.08%
aneesh kumar aneesh kumar 150 1.34% 8 5.44%
dan williams dan williams 123 1.10% 4 2.72%
jan kara jan kara 108 0.96% 1 0.68%
matthew wilcox matthew wilcox 101 0.90% 4 2.72%
hugh dickins hugh dickins 90 0.80% 2 1.36%
will deacon will deacon 68 0.61% 1 0.68%
keno fischer keno fischer 53 0.47% 1 0.68%
ben hutchings ben hutchings 46 0.41% 1 0.68%
minchan kim minchan kim 45 0.40% 2 1.36%
naoya horiguchi naoya horiguchi 33 0.29% 1 0.68%
glauber costa glauber costa 30 0.27% 1 0.68%
vlastimil babka vlastimil babka 30 0.27% 1 0.68%
li shaohua li shaohua 28 0.25% 1 0.68%
johannes weiner johannes weiner 27 0.24% 1 0.68%
sagi grimberg sagi grimberg 18 0.16% 1 0.68%
xiao guangrong xiao guangrong 13 0.12% 5 3.40%
yu zhao yu zhao 12 0.11% 1 0.68%
andi kleen andi kleen 11 0.10% 3 2.04%
andrew morton andrew morton 11 0.10% 2 1.36%
david rientjes david rientjes 11 0.10% 3 2.04%
bob liu bob liu 7 0.06% 2 1.36%
huang ying huang ying 6 0.05% 2 1.36%
joonsoo kim joonsoo kim 5 0.04% 1 0.68%
rik van riel rik van riel 4 0.04% 1 0.68%
sasha levin sasha levin 4 0.04% 2 1.36%
ralf baechle ralf baechle 3 0.03% 1 0.68%
vladimir davydov vladimir davydov 3 0.03% 1 0.68%
yang shi yang shi 2 0.02% 1 0.68%
dominik dingel dominik dingel 2 0.02% 1 0.68%
gerald schaefer gerald schaefer 1 0.01% 1 0.68%
jianguo wu jianguo wu 1 0.01% 1 0.68%
michal hocko michal hocko 1 0.01% 1 0.68%
jason low jason low 1 0.01% 1 0.68%
borislav petkov borislav petkov 1 0.01% 1 0.68%
Total 11225 100.00% 147 100.00%
Directory: mm

Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
	Person	Tokens	Prop	Commits	CommitProp
kirill a. shutemov	kirill a. shutemov	99	81.82%	6	54.55%
andrea arcangeli	andrea arcangeli	14	11.57%	2	18.18%
yu zhao	yu zhao	6	4.96%	1	9.09%
jason low	jason low	1	0.83%	1	9.09%
aaron lu	aaron lu	1	0.83%	1	9.09%
	Total	121	100.00%	11	100.00%
cregit-Linux how code gets into the kernel

Release 4.10 mm/huge_memory.c

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Overall Contributors