Release 4.11 fs/dax.c
/*
* fs/dax.c - Direct Access filesystem code
* Copyright (c) 2013-2014 Intel Corporation
* Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
* Author: Ross Zwisler <ross.zwisler@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/atomic.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h>
/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
static int __init init_dax_wait_table(void)
{
int i;
for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++)
init_waitqueue_head(wait_table + i);
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 35 | 100.00% | 1 | 100.00% |
Total | 35 | 100.00% | 1 | 100.00% |
fs_initcall(init_dax_wait_table);
static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
{
struct request_queue *q = bdev->bd_queue;
long rc = -EIO;
dax->addr = ERR_PTR(-EIO);
if (blk_queue_enter(q, true) != 0)
return rc;
rc = bdev_direct_access(bdev, dax);
if (rc < 0) {
dax->addr = ERR_PTR(rc);
blk_queue_exit(q);
return rc;
}
return rc;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Dan J Williams | 92 | 100.00% | 1 | 100.00% |
Total | 92 | 100.00% | 1 | 100.00% |
static void dax_unmap_atomic(struct block_device *bdev,
const struct blk_dax_ctl *dax)
{
if (IS_ERR(dax->addr))
return;
blk_queue_exit(bdev->bd_queue);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Dan J Williams | 34 | 100.00% | 1 | 100.00% |
Total | 34 | 100.00% | 1 | 100.00% |
static int dax_is_pmd_entry(void *entry)
{
return (unsigned long)entry & RADIX_DAX_PMD;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ross Zwisler | 19 | 100.00% | 1 | 100.00% |
Total | 19 | 100.00% | 1 | 100.00% |
static int dax_is_pte_entry(void *entry)
{
return !((unsigned long)entry & RADIX_DAX_PMD);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ross Zwisler | 22 | 100.00% | 1 | 100.00% |
Total | 22 | 100.00% | 1 | 100.00% |
static int dax_is_zero_entry(void *entry)
{
return (unsigned long)entry & RADIX_DAX_HZP;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ross Zwisler | 19 | 100.00% | 1 | 100.00% |
Total | 19 | 100.00% | 1 | 100.00% |
static int dax_is_empty_entry(void *entry)
{
return (unsigned long)entry & RADIX_DAX_EMPTY;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ross Zwisler | 19 | 100.00% | 1 | 100.00% |
Total | 19 | 100.00% | 1 | 100.00% |
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
};
long rc;
if (!page)
return ERR_PTR(-ENOMEM);
rc = dax_map_atomic(bdev, &dax);
if (rc < 0)
return ERR_PTR(rc);
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
dax_unmap_atomic(bdev, &dax);
return page;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Dan J Williams | 121 | 100.00% | 1 | 100.00% |
Total | 121 | 100.00% | 1 | 100.00% |
/*
* DAX radix tree locking
*/
struct exceptional_entry_key {
struct address_space *mapping;
pgoff_t entry_start;
};
struct wait_exceptional_entry_queue {
wait_queue_t wait;
struct exceptional_entry_key key;
};
static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
pgoff_t index, void *entry, struct exceptional_entry_key *key)
{
unsigned long hash;
/*
* If 'entry' is a PMD, align the 'index' that we use for the wait
* queue to the start of that PMD. This ensures that all offsets in
* the range covered by the PMD map to the same bit lock.
*/
if (dax_is_pmd_entry(entry))
index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
key->mapping = mapping;
key->entry_start = index;
hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS);
return wait_table + hash;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 70 | 82.35% | 2 | 66.67% |
Dan J Williams | 15 | 17.65% | 1 | 33.33% |
Total | 85 | 100.00% | 3 | 100.00% |
static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
int sync, void *keyp)
{
struct exceptional_entry_key *key = keyp;
struct wait_exceptional_entry_queue *ewait =
container_of(wait, struct wait_exceptional_entry_queue, wait);
if (key->mapping != ewait->key.mapping ||
key->entry_start != ewait->key.entry_start)
return 0;
return autoremove_wake_function(wait, mode, sync, NULL);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 71 | 88.75% | 3 | 50.00% |
Matthew Wilcox | 5 | 6.25% | 1 | 16.67% |
Dan J Williams | 3 | 3.75% | 1 | 16.67% |
Ross Zwisler | 1 | 1.25% | 1 | 16.67% |
Total | 80 | 100.00% | 6 | 100.00% |
/*
* Check whether the given slot is locked. The function must be called with
* mapping->tree_lock held
*/
static inline int slot_locked(struct address_space *mapping, void **slot)
{
unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
return entry & RADIX_DAX_ENTRY_LOCK;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 29 | 72.50% | 2 | 66.67% |
Ross Zwisler | 11 | 27.50% | 1 | 33.33% |
Total | 40 | 100.00% | 3 | 100.00% |
/*
* Mark the given slot is locked. The function must be called with
* mapping->tree_lock held
*/
static inline void *lock_slot(struct address_space *mapping, void **slot)
{
unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry |= RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
return (void *)entry;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 54 | 85.71% | 2 | 40.00% |
Ross Zwisler | 7 | 11.11% | 2 | 40.00% |
Linus Torvalds | 2 | 3.17% | 1 | 20.00% |
Total | 63 | 100.00% | 5 | 100.00% |
/*
* Mark the given slot is unlocked. The function must be called with
* mapping->tree_lock held
*/
static inline void *unlock_slot(struct address_space *mapping, void **slot)
{
unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
return (void *)entry;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 61 | 89.71% | 2 | 50.00% |
Linus Torvalds | 4 | 5.88% | 1 | 25.00% |
Ross Zwisler | 3 | 4.41% | 1 | 25.00% |
Total | 68 | 100.00% | 4 | 100.00% |
/*
* Lookup entry in radix tree, wait for it to become unlocked if it is
* exceptional entry and return it. The caller must call
* put_unlocked_mapping_entry() when he decided not to lock the entry or
* put_locked_mapping_entry() when he locked the entry and now wants to
* unlock it.
*
* The function must be called with mapping->tree_lock held.
*/
static void *get_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void ***slotp)
{
void *entry, **slot;
struct wait_exceptional_entry_queue ewait;
wait_queue_head_t *wq;
init_wait(&ewait.wait);
ewait.wait.func = wake_exceptional_entry_func;
for (;;) {
entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
&slot);
if (!entry || !radix_tree_exceptional_entry(entry) ||
!slot_locked(mapping, slot)) {
if (slotp)
*slotp = slot;
return entry;
}
wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&mapping->tree_lock);
schedule();
finish_wait(wq, &ewait.wait);
spin_lock_irq(&mapping->tree_lock);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 158 | 94.61% | 2 | 66.67% |
Ross Zwisler | 9 | 5.39% | 1 | 33.33% |
Total | 167 | 100.00% | 3 | 100.00% |
static void dax_unlock_mapping_entry(struct address_space *mapping,
pgoff_t index)
{
void *entry, **slot;
spin_lock_irq(&mapping->tree_lock);
entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
!slot_locked(mapping, slot))) {
spin_unlock_irq(&mapping->tree_lock);
return;
}
unlock_slot(mapping, slot);
spin_unlock_irq(&mapping->tree_lock);
dax_wake_mapping_entry_waiter(mapping, index, entry, false);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 106 | 100.00% | 1 | 100.00% |
Total | 106 | 100.00% | 1 | 100.00% |
static void put_locked_mapping_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
if (!radix_tree_exceptional_entry(entry)) {
unlock_page(entry);
put_page(entry);
} else {
dax_unlock_mapping_entry(mapping, index);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 48 | 100.00% | 2 | 100.00% |
Total | 48 | 100.00% | 2 | 100.00% |
/*
* Called when we are done with radix tree entry we looked up via
* get_unlocked_mapping_entry() and which we didn't lock in the end.
*/
static void put_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
if (!radix_tree_exceptional_entry(entry))
return;
/* We have to wake up next waiter for the radix tree entry lock */
dax_wake_mapping_entry_waiter(mapping, index, entry, false);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 39 | 100.00% | 2 | 100.00% |
Total | 39 | 100.00% | 2 | 100.00% |
/*
* Find radix tree entry at given index. If it points to a page, return with
* the page locked. If it points to the exceptional entry, return with the
* radix tree entry locked. If the radix tree doesn't contain given index,
* create empty exceptional entry for the index and return with it locked.
*
* When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
* either return that locked entry or will return an error. This error will
* happen if there are any 4k entries (either zero pages or DAX entries)
* within the 2MiB range that we are requesting.
*
* We always favor 4k entries over 2MiB entries. There isn't a flow where we
* evict 4k entries in order to 'upgrade' them to a 2MiB entry. A 2MiB
* insertion will fail if it finds any 4k entries already in the tree, and a
* 4k insertion will cause an existing 2MiB entry to be unmapped and
* downgraded to 4k entries. This happens for both 2MiB huge zero pages as
* well as 2MiB empty entries.
*
* The exception to this downgrade path is for 2MiB DAX PMD entries that have
* real storage backing them. We will leave these real 2MiB DAX entries in
* the tree, and PTE writes will simply dirty the entire 2MiB DAX entry.
*
* Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For
* persistent memory the benefit is doubtful. We can add that later if we can
* show it helps.
*/
static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
unsigned long size_flag)
{
bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
void *entry, **slot;
restart:
spin_lock_irq(&mapping->tree_lock);
entry = get_unlocked_mapping_entry(mapping, index, &slot);
if (entry) {
if (size_flag & RADIX_DAX_PMD) {
if (!radix_tree_exceptional_entry(entry) ||
dax_is_pte_entry(entry)) {
put_unlocked_mapping_entry(mapping, index,
entry);
entry = ERR_PTR(-EEXIST);
goto out_unlock;
}
} else { /* trying to grab a PTE entry */
if (radix_tree_exceptional_entry(entry) &&
dax_is_pmd_entry(entry) &&
(dax_is_zero_entry(entry) ||
dax_is_empty_entry(entry))) {
pmd_downgrade = true;
}
}
}
/* No entry for given index? Make sure radix tree is big enough. */
if (!entry || pmd_downgrade) {
int err;
if (pmd_downgrade) {
/*
* Make sure 'entry' remains valid while we drop
* mapping->tree_lock.
*/
entry = lock_slot(mapping, slot);
}
spin_unlock_irq(&mapping->tree_lock);
/*
* Besides huge zero pages the only other thing that gets
* downgraded are empty entries which don't need to be
* unmapped.
*/
if (pmd_downgrade && dax_is_zero_entry(entry))
unmap_mapping_range(mapping,
(index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
err = radix_tree_preload(
mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
if (err) {
if (pmd_downgrade)
put_locked_mapping_entry(mapping, index, entry);
return ERR_PTR(err);
}
spin_lock_irq(&mapping->tree_lock);
if (!entry) {
/*
* We needed to drop the page_tree lock while calling
* radix_tree_preload() and we didn't have an entry to
* lock. See if another thread inserted an entry at
* our index during this time.
*/
entry = __radix_tree_lookup(&mapping->page_tree, index,
NULL, &slot);
if (entry) {
radix_tree_preload_end();
spin_unlock_irq(&mapping->tree_lock);
goto restart;
}
}
if (pmd_downgrade) {
radix_tree_delete(&mapping->page_tree, index);
mapping->nrexceptional--;
dax_wake_mapping_entry_waiter(mapping, index, entry,
true);
}
entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
err = __radix_tree_insert(&mapping->page_tree, index,
dax_radix_order(entry), entry);
radix_tree_preload_end();
if (err) {
spin_unlock_irq(&mapping->tree_lock);
/*
* Our insertion of a DAX entry failed, most likely
* because we were inserting a PMD entry and it
* collided with a PTE sized entry at a different
* index in the PMD range. We haven't inserted
* anything into the radix tree and have no waiters to
* wake.
*/
return ERR_PTR(err);
}
/* Good, we have inserted empty locked entry into the tree. */
mapping->nrexceptional++;
spin_unlock_irq(&mapping->tree_lock);
return entry;
}
/* Normal page in radix tree? */
if (!radix_tree_exceptional_entry(entry)) {
struct page *page = entry;
get_page(page);
spin_unlock_irq(&mapping->tree_lock);
lock_page(page);
/* Page got truncated? Retry... */
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto restart;
}
return page;
}
entry = lock_slot(mapping, slot);
out_unlock:
spin_unlock_irq(&mapping->tree_lock);
return entry;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 405 | 83.68% | 3 | 30.00% |
Ross Zwisler | 66 | 13.64% | 4 | 40.00% |
Matthew Wilcox | 10 | 2.07% | 1 | 10.00% |
Dan J Williams | 2 | 0.41% | 1 | 10.00% |
Christoph Hellwig | 1 | 0.21% | 1 | 10.00% |
Total | 484 | 100.00% | 10 | 100.00% |
/*
* We do not necessarily hold the mapping->tree_lock when we call this
* function so it is possible that 'entry' is no longer a valid item in the
* radix tree. This is okay because all we really need to do is to find the
* correct waitqueue where tasks might be waiting for that old 'entry' and
* wake them.
*/
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, void *entry, bool wake_all)
{
struct exceptional_entry_key key;
wait_queue_head_t *wq;
wq = dax_entry_waitqueue(mapping, index, entry, &key);
/*
* Checking for locked entry and prepare_to_wait_exclusive() happens
* under mapping->tree_lock, ditto for entry handling in our callers.
* So at this point all tasks that could have seen our entry locked
* must be in the waitqueue and the following check will see them.
*/
if (waitqueue_active(wq))
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 45 | 68.18% | 2 | 66.67% |
Ross Zwisler | 21 | 31.82% | 1 | 33.33% |
Total | 66 | 100.00% | 3 | 100.00% |
static int __dax_invalidate_mapping_entry(struct address_space *mapping,
pgoff_t index, bool trunc)
{
int ret = 0;
void *entry;
struct radix_tree_root *page_tree = &mapping->page_tree;
spin_lock_irq(&mapping->tree_lock);
entry = get_unlocked_mapping_entry(mapping, index, NULL);
if (!entry || !radix_tree_exceptional_entry(entry))
goto out;
if (!trunc &&
(radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
goto out;
radix_tree_delete(page_tree, index);
mapping->nrexceptional--;
ret = 1;
out:
put_unlocked_mapping_entry(mapping, index, entry);
spin_unlock_irq(&mapping->tree_lock);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 106 | 78.52% | 2 | 50.00% |
Ross Zwisler | 29 | 21.48% | 2 | 50.00% |
Total | 135 | 100.00% | 4 | 100.00% |
/*
* Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
* entry to get unlocked before deleting it.
*/
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
{
int ret = __dax_invalidate_mapping_entry(mapping, index, true);
/*
* This gets called from truncate / punch_hole path. As such, the caller
* must hold locks protecting against concurrent modifications of the
* radix tree (usually fs-private i_mmap_sem for writing). Since the
* caller has seen exceptional entry for this index, we better find it
* at that index as well...
*/
WARN_ON_ONCE(!ret);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 34 | 97.14% | 1 | 50.00% |
Ross Zwisler | 1 | 2.86% | 1 | 50.00% |
Total | 35 | 100.00% | 2 | 100.00% |
/*
* Invalidate exceptional DAX entry if easily possible. This handles DAX
* entries for invalidate_inode_pages() so we evict the entry only if we can
* do so without blocking.
*/
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
{
int ret = 0;
void *entry, **slot;
struct radix_tree_root *page_tree = &mapping->page_tree;
spin_lock_irq(&mapping->tree_lock);
entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
if (!entry || !radix_tree_exceptional_entry(entry) ||
slot_locked(mapping, slot))
goto out;
if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
goto out;
radix_tree_delete(page_tree, index);
mapping->nrexceptional--;
ret = 1;
out:
spin_unlock_irq(&mapping->tree_lock);
if (ret)
dax_wake_mapping_entry_waiter(mapping, index, entry, true);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 137 | 93.84% | 3 | 60.00% |
Ross Zwisler | 7 | 4.79% | 1 | 20.00% |
Johannes Weiner | 2 | 1.37% | 1 | 20.00% |
Total | 146 | 100.00% | 5 | 100.00% |
/*
* Invalidate exceptional DAX entry if it is clean.
*/
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index)
{
return __dax_invalidate_mapping_entry(mapping, index, false);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 23 | 100.00% | 3 | 100.00% |
Total | 23 | 100.00% | 3 | 100.00% |
/*
* The user has performed a load from a hole in the file. Allocating
* a new page in the file would cause excessive storage usage for
* workloads with sparse files. We allocate a page cache page instead.
* We'll kick it out of the page cache if it's ever written to,
* otherwise it will simply fall out of the page cache under memory
* pressure without ever having been dirtied.
*/
static int dax_load_hole(struct address_space *mapping, void **entry,
struct vm_fault *vmf)
{
struct page *page;
int ret;
/* Hole page already exists? Return it... */
if (!radix_tree_exceptional_entry(*entry)) {
page = *entry;
goto out;
}
/* This will replace locked radix tree entry with a hole page */
page = find_or_create_page(mapping, vmf->pgoff,
vmf->gfp_mask | __GFP_ZERO);
if (!page)
return VM_FAULT_OOM;
out:
vmf->page = page;
ret = finish_fault(vmf);
vmf->page = NULL;
*entry = page;
if (!ret) {
/* Grab reference for PTE that is now referencing the page */
get_page(page);
return VM_FAULT_NOPAGE;
}
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 107 | 89.17% | 3 | 75.00% |
Ross Zwisler | 13 | 10.83% | 1 | 25.00% |
Total | 120 | 100.00% | 4 | 100.00% |
static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
struct page *to, unsigned long vaddr)
{
struct blk_dax_ctl dax = {
.sector = sector,
.size = size,
};
void *vto;
if (dax_map_atomic(bdev, &dax) < 0)
return PTR_ERR(dax.addr);
vto = kmap_atomic(to);
copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
kunmap_atomic(vto);
dax_unmap_atomic(bdev, &dax);
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ross Zwisler | 66 | 61.68% | 1 | 50.00% |
Jan Kara | 41 | 38.32% | 1 | 50.00% |
Total | 107 | 100.00% | 2 | 100.00% |
/*
* By this point grab_mapping_entry() has ensured that we have a locked entry
* of the appropriate size so we don't have to worry about downgrading PMDs to
* PTEs. If we happen to be trying to insert a PTE and there is a PMD
* already in the tree, we will skip the insertion and just dirty the PMD as
* appropriate.
*/
static void *dax_insert_mapping_entry(struct address_space *mapping,
struct vm_fault *vmf,
void *entry, sector_t sector,
unsigned long flags)
{
struct radix_tree_root *page_tree = &mapping->page_tree;
int error = 0;
bool hole_fill = false;
void *new_entry;
pgoff_t index = vmf->pgoff;
if (vmf->flags & FAULT_FLAG_WRITE)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
/* Replacing hole page with block mapping? */
if (!radix_tree_exceptional_entry(entry)) {
hole_fill = true;
/*
* Unmap the page now before we remove it from page cache below.
* The page is locked so it cannot be faulted in again.
*/
unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
PAGE_SIZE, 0);
error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
if (error)
return ERR_PTR(error);
} else if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_HZP)) {
/* replacing huge zero page with PMD block mapping */
unmap_mapping_range(mapping,
(vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
}
spin_lock_irq(&mapping->tree_lock);
new_entry = dax_radix_locked_entry(sector, flags);
if (hole_fill) {
__delete_from_page_cache(entry, NULL);
/* Drop pagecache reference */
put_page(entry);
error = __radix_tree_insert(page_tree, index,
dax_radix_order(new_entry), new_entry);
if (error) {
new_entry = ERR_PTR(error);
goto unlock;
}
mapping->nrexceptional++;
} else if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
/*
* Only swap our new entry into the radix tree if the current
* entry is a zero page or an empty entry. If a normal PTE or
* PMD entry is already in the tree, we leave it alone. This
* means that if we are trying to insert a PTE and the
* existing entry is a PMD, we will just leave the PMD in the
* tree and dirty it if necessary.
*/
struct radix_tree_node *node;
void **slot;
void *ret;
ret = __radix_tree_lookup(page_tree, index, &node, &slot);
WARN_ON_ONCE(ret != entry);
__radix_tree_replace(page_tree, node, slot,
new_entry, NULL, NULL);
}
if (vmf->flags & FAULT_FLAG_WRITE)
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
unlock:
spin_unlock_irq(&mapping->tree_lock);
if (hole_fill) {
radix_tree_preload_end();
/*
* We don't need hole page anymore, it has been replaced with
* locked radix tree entry now.
*/
if (mapping->a_ops->freepage)
mapping->a_ops->freepage(entry);
unlock_page(entry);
put_page(entry);
}
return new_entry;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 219 | 58.71% | 2 | 25.00% |
Ross Zwisler | 113 | 30.29% | 2 | 25.00% |
Dan J Williams | 15 | 4.02% | 1 | 12.50% |
Matthew Wilcox | 13 | 3.49% | 1 | 12.50% |
Linus Torvalds | 12 | 3.22% | 1 | 12.50% |
Kirill A. Shutemov | 1 | 0.27% | 1 | 12.50% |
Total | 373 | 100.00% | 8 | 100.00% |
static inline unsigned long
pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
{
unsigned long address;
address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
return address;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 58 | 100.00% | 1 | 100.00% |
Total | 58 | 100.00% | 1 | 100.00% |
/* Walk all mappings of a given index of a file and writeprotect them */
static void dax_mapping_entry_mkclean(struct address_space *mapping,
pgoff_t index, unsigned long pfn)
{
struct vm_area_struct *vma;
pte_t pte, *ptep = NULL;
pmd_t *pmdp = NULL;
spinlock_t *ptl;
bool changed;
i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
unsigned long address;
cond_resched();
if (!(vma->vm_flags & VM_SHARED))
continue;
address = pgoff_address(index, vma);
changed = false;
if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
continue;
if (pmdp) {
#ifdef CONFIG_FS_DAX_PMD
pmd_t pmd;
if (pfn != pmd_pfn(*pmdp))
goto unlock_pmd;
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
pmd = pmdp_huge_clear_flush(vma, address, pmdp);
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
changed = true;
unlock_pmd:
spin_unlock(ptl);
#endif
} else {
if (pfn != pte_pfn(*ptep))
goto unlock_pte;
if (!pte_dirty(*ptep) && !pte_write(*ptep))
goto unlock_pte;
flush_cache_page(vma, address, pfn);
pte = ptep_clear_flush(vma, address, ptep);
pte = pte_wrprotect(pte);
pte = pte_mkclean(pte);
set_pte_at(vma->vm_mm, address, ptep, pte);
changed = true;
unlock_pte:
pte_unmap_unlock(ptep, ptl);
}
if (changed)
mmu_notifier_invalidate_page(vma->vm_mm, address);
}
i_mmap_unlock_read(mapping);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Jan Kara | 208 | 62.84% | 1 | 50.00% |
Ross Zwisler | 123 | 37.16% | 1 | 50.00% |
Total | 331 | 100.00% | 2 | 100.00% |
static int