Release 4.7 fs/dax.c
/*
* fs/dax.c - Direct Access filesystem code
* Copyright (c) 2013-2014 Intel Corporation
* Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
* Author: Ross Zwisler <ross.zwisler@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/atomic.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
/*
* We use lowest available bit in exceptional entry for locking, other two
* bits to determine entry type. In total 3 special bits.
*/
#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)
#define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
#define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD)
#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK)
#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \
RADIX_TREE_EXCEPTIONAL_ENTRY))
/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
static int __init init_dax_wait_table(void)
{
int i;
for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++)
init_waitqueue_head(wait_table + i);
return 0;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 35 | 100.00% | 1 | 100.00% |
| Total | 35 | 100.00% | 1 | 100.00% |
fs_initcall(init_dax_wait_table);
static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
pgoff_t index)
{
unsigned long hash = hash_long((unsigned long)mapping ^ index,
DAX_WAIT_TABLE_BITS);
return wait_table + hash;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 37 | 100.00% | 1 | 100.00% |
| Total | 37 | 100.00% | 1 | 100.00% |
static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
{
struct request_queue *q = bdev->bd_queue;
long rc = -EIO;
dax->addr = (void __pmem *) ERR_PTR(-EIO);
if (blk_queue_enter(q, true) != 0)
return rc;
rc = bdev_direct_access(bdev, dax);
if (rc < 0) {
dax->addr = (void __pmem *) ERR_PTR(rc);
blk_queue_exit(q);
return rc;
}
return rc;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
dan williams | dan williams | 102 | 100.00% | 1 | 100.00% |
| Total | 102 | 100.00% | 1 | 100.00% |
static void dax_unmap_atomic(struct block_device *bdev,
const struct blk_dax_ctl *dax)
{
if (IS_ERR(dax->addr))
return;
blk_queue_exit(bdev->bd_queue);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
dan williams | dan williams | 34 | 100.00% | 1 | 100.00% |
| Total | 34 | 100.00% | 1 | 100.00% |
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
};
long rc;
if (!page)
return ERR_PTR(-ENOMEM);
rc = dax_map_atomic(bdev, &dax);
if (rc < 0)
return ERR_PTR(rc);
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
dax_unmap_atomic(bdev, &dax);
return page;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
dan williams | dan williams | 121 | 100.00% | 1 | 100.00% |
| Total | 121 | 100.00% | 1 | 100.00% |
static bool buffer_written(struct buffer_head *bh)
{
return buffer_mapped(bh) && !buffer_unwritten(bh);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
matthew wilcox | matthew wilcox | 23 | 100.00% | 1 | 100.00% |
| Total | 23 | 100.00% | 1 | 100.00% |
/*
* When ext4 encounters a hole, it returns without modifying the buffer_head
* which means that we can't trust b_size. To cope with this, we set b_state
* to 0 before calling get_block and, if any bit is set, we know we can trust
* b_size. Unfortunate, really, since ext4 knows precisely how long a hole is
* and would save us time calling get_block repeatedly.
*/
static bool buffer_size_valid(struct buffer_head *bh)
{
return bh->b_state != 0;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
matthew wilcox | matthew wilcox | 18 | 100.00% | 1 | 100.00% |
| Total | 18 | 100.00% | 1 | 100.00% |
static sector_t to_sector(const struct buffer_head *bh,
const struct inode *inode)
{
sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
return sector;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
dan williams | dan williams | 36 | 100.00% | 1 | 100.00% |
| Total | 36 | 100.00% | 1 | 100.00% |
static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
loff_t start, loff_t end, get_block_t get_block,
struct buffer_head *bh)
{
loff_t pos = start, max = start, bh_max = start;
bool hole = false, need_wmb = false;
struct block_device *bdev = NULL;
int rw = iov_iter_rw(iter), rc;
long map_len = 0;
struct blk_dax_ctl dax = {
.addr = (void __pmem *) ERR_PTR(-EIO),
};
unsigned blkbits = inode->i_blkbits;
sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
>> blkbits;
if (rw == READ)
end = min(end, i_size_read(inode));
while (pos < end) {
size_t len;
if (pos == max) {
long page = pos >> PAGE_SHIFT;
sector_t block = page << (PAGE_SHIFT - blkbits);
unsigned first = pos - (block << blkbits);
long size;
if (pos == bh_max) {
bh->b_size = PAGE_ALIGN(end - pos);
bh->b_state = 0;
rc = get_block(inode, block, bh, rw == WRITE);
if (rc)
break;
if (!buffer_size_valid(bh))
bh->b_size = 1 << blkbits;
bh_max = pos - first + bh->b_size;
bdev = bh->b_bdev;
/*
* We allow uninitialized buffers for writes
* beyond EOF as those cannot race with faults
*/
WARN_ON_ONCE(
(buffer_new(bh) && block < file_blks) ||
(rw == WRITE && buffer_unwritten(bh)));
} else {
unsigned done = bh->b_size -
(bh_max - (pos - first));
bh->b_blocknr += done >> blkbits;
bh->b_size -= done;
}
hole = rw == READ && !buffer_written(bh);
if (hole) {
size = bh->b_size - first;
} else {
dax_unmap_atomic(bdev, &dax);
dax.sector = to_sector(bh, inode);
dax.size = bh->b_size;
map_len = dax_map_atomic(bdev, &dax);
if (map_len < 0) {
rc = map_len;
break;
}
dax.addr += first;
size = map_len - first;
}
/*
* pos + size is one past the last offset for IO,
* so pos + size can overflow loff_t at extreme offsets.
* Cast to u64 to catch this and get the true minimum.
*/
max = min_t(u64, pos + size, end);
}
if (iov_iter_rw(iter) == WRITE) {
len = copy_from_iter_pmem(dax.addr, max - pos, iter);
need_wmb = true;
} else if (!hole)
len = copy_to_iter((void __force *) dax.addr, max - pos,
iter);
else
len = iov_iter_zero(max - pos, iter);
if (!len) {
rc = -EFAULT;
break;
}
pos += len;
if (!IS_ERR(dax.addr))
dax.addr += len;
}
if (need_wmb)
wmb_pmem();
dax_unmap_atomic(bdev, &dax);
return (pos == start) ? rc : pos - start;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
matthew wilcox | matthew wilcox | 328 | 58.47% | 1 | 11.11% |
dan williams | dan williams | 124 | 22.10% | 1 | 11.11% |
jan kara | jan kara | 53 | 9.45% | 1 | 11.11% |
ross zwisler | ross zwisler | 25 | 4.46% | 2 | 22.22% |
jeff moyer | jeff moyer | 13 | 2.32% | 1 | 11.11% |
omar sandoval | omar sandoval | 8 | 1.43% | 1 | 11.11% |
al viro | al viro | 6 | 1.07% | 1 | 11.11% |
eric sandeen | eric sandeen | 4 | 0.71% | 1 | 11.11% |
| Total | 561 | 100.00% | 9 | 100.00% |
/**
* dax_do_io - Perform I/O to a DAX file
* @iocb: The control block for this I/O
* @inode: The file which the I/O is directed at
* @iter: The addresses to do I/O from or to
* @get_block: The filesystem method used to translate file offsets to blocks
* @end_io: A filesystem callback for I/O completion
* @flags: See below
*
* This function uses the same locking scheme as do_blockdev_direct_IO:
* If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
* caller for writes. For reads, we take and release the i_mutex ourselves.
* If DIO_LOCKING is not set, the filesystem takes care of its own locking.
* As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
* is in progress.
*/
ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
struct iov_iter *iter, get_block_t get_block,
dio_iodone_t end_io, int flags)
{
struct buffer_head bh;
ssize_t retval = -EINVAL;
loff_t pos = iocb->ki_pos;
loff_t end = pos + iov_iter_count(iter);
memset(&bh, 0, sizeof(bh));
bh.b_bdev = inode->i_sb->s_bdev;
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
inode_lock(inode);
/* Protects against truncate */
if (!(flags & DIO_SKIP_DIO_COUNT))
inode_dio_begin(inode);
retval = dax_io(inode, iter, pos, end, get_block, &bh);
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
inode_unlock(inode);
if (end_io) {
int err;
err = end_io(iocb, pos, retval, bh.b_private);
if (err)
retval = err;
}
if (!(flags & DIO_SKIP_DIO_COUNT))
inode_dio_end(inode);
return retval;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
matthew wilcox | matthew wilcox | 157 | 78.11% | 2 | 25.00% |
christoph hellwig | christoph hellwig | 22 | 10.95% | 2 | 25.00% |
ross zwisler | ross zwisler | 10 | 4.98% | 1 | 12.50% |
omar sandoval | omar sandoval | 8 | 3.98% | 1 | 12.50% |
al viro | al viro | 2 | 1.00% | 1 | 12.50% |
jens axboe | jens axboe | 2 | 1.00% | 1 | 12.50% |
| Total | 201 | 100.00% | 8 | 100.00% |
EXPORT_SYMBOL_GPL(dax_do_io);
/*
* DAX radix tree locking
*/
struct exceptional_entry_key {
struct address_space *mapping;
unsigned long index;
};
struct wait_exceptional_entry_queue {
wait_queue_t wait;
struct exceptional_entry_key key;
};
static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
int sync, void *keyp)
{
struct exceptional_entry_key *key = keyp;
struct wait_exceptional_entry_queue *ewait =
container_of(wait, struct wait_exceptional_entry_queue, wait);
if (key->mapping != ewait->key.mapping ||
key->index != ewait->key.index)
return 0;
return autoremove_wake_function(wait, mode, sync, NULL);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 63 | 78.75% | 1 | 50.00% |
matthew wilcox | matthew wilcox | 17 | 21.25% | 1 | 50.00% |
| Total | 80 | 100.00% | 2 | 100.00% |
/*
* Check whether the given slot is locked. The function must be called with
* mapping->tree_lock held
*/
static inline int slot_locked(struct address_space *mapping, void **slot)
{
unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
return entry & RADIX_DAX_ENTRY_LOCK;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 25 | 62.50% | 1 | 33.33% |
matthew wilcox | matthew wilcox | 9 | 22.50% | 1 | 33.33% |
dan williams | dan williams | 6 | 15.00% | 1 | 33.33% |
| Total | 40 | 100.00% | 3 | 100.00% |
/*
* Mark the given slot is locked. The function must be called with
* mapping->tree_lock held
*/
static inline void *lock_slot(struct address_space *mapping, void **slot)
{
unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry |= RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(slot, (void *)entry);
return (void *)entry;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 54 | 93.10% | 1 | 25.00% |
matthew wilcox | matthew wilcox | 2 | 3.45% | 1 | 25.00% |
ross zwisler | ross zwisler | 1 | 1.72% | 1 | 25.00% |
dan williams | dan williams | 1 | 1.72% | 1 | 25.00% |
| Total | 58 | 100.00% | 4 | 100.00% |
/*
* Mark the given slot is unlocked. The function must be called with
* mapping->tree_lock held
*/
static inline void *unlock_slot(struct address_space *mapping, void **slot)
{
unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(slot, (void *)entry);
return (void *)entry;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 63 | 100.00% | 1 | 100.00% |
| Total | 63 | 100.00% | 1 | 100.00% |
/*
* Lookup entry in radix tree, wait for it to become unlocked if it is
* exceptional entry and return it. The caller must call
* put_unlocked_mapping_entry() when he decided not to lock the entry or
* put_locked_mapping_entry() when he locked the entry and now wants to
* unlock it.
*
* The function must be called with mapping->tree_lock held.
*/
static void *get_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void ***slotp)
{
void *ret, **slot;
struct wait_exceptional_entry_queue ewait;
wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
init_wait(&ewait.wait);
ewait.wait.func = wake_exceptional_entry_func;
ewait.key.mapping = mapping;
ewait.key.index = index;
for (;;) {
ret = __radix_tree_lookup(&mapping->page_tree, index, NULL,
&slot);
if (!ret || !radix_tree_exceptional_entry(ret) ||
!slot_locked(mapping, slot)) {
if (slotp)
*slotp = slot;
return ret;
}
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&mapping->tree_lock);
schedule();
finish_wait(wq, &ewait.wait);
spin_lock_irq(&mapping->tree_lock);
}
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 174 | 100.00% | 1 | 100.00% |
| Total | 174 | 100.00% | 1 | 100.00% |
/*
* Find radix tree entry at given index. If it points to a page, return with
* the page locked. If it points to the exceptional entry, return with the
* radix tree entry locked. If the radix tree doesn't contain given index,
* create empty exceptional entry for the index and return with it locked.
*
* Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For
* persistent memory the benefit is doubtful. We can add that later if we can
* show it helps.
*/
static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
{
void *ret, **slot;
restart:
spin_lock_irq(&mapping->tree_lock);
ret = get_unlocked_mapping_entry(mapping, index, &slot);
/* No entry for given index? Make sure radix tree is big enough. */
if (!ret) {
int err;
spin_unlock_irq(&mapping->tree_lock);
err = radix_tree_preload(
mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
if (err)
return ERR_PTR(err);
ret = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
RADIX_DAX_ENTRY_LOCK);
spin_lock_irq(&mapping->tree_lock);
err = radix_tree_insert(&mapping->page_tree, index, ret);
radix_tree_preload_end();
if (err) {
spin_unlock_irq(&mapping->tree_lock);
/* Someone already created the entry? */
if (err == -EEXIST)
goto restart;
return ERR_PTR(err);
}
/* Good, we have inserted empty locked entry into the tree. */
mapping->nrexceptional++;
spin_unlock_irq(&mapping->tree_lock);
return ret;
}
/* Normal page in radix tree? */
if (!radix_tree_exceptional_entry(ret)) {
struct page *page = ret;
get_page(page);
spin_unlock_irq(&mapping->tree_lock);
lock_page(page);
/* Page got truncated? Retry... */
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto restart;
}
return page;
}
ret = lock_slot(mapping, slot);
spin_unlock_irq(&mapping->tree_lock);
return ret;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 258 | 100.00% | 1 | 100.00% |
| Total | 258 | 100.00% | 1 | 100.00% |
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, bool wake_all)
{
wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
/*
* Checking for locked entry and prepare_to_wait_exclusive() happens
* under mapping->tree_lock, ditto for entry handling in our callers.
* So at this point all tasks that could have seen our entry locked
* must be in the waitqueue and the following check will see them.
*/
if (waitqueue_active(wq)) {
struct exceptional_entry_key key;
key.mapping = mapping;
key.index = index;
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
}
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 69 | 100.00% | 1 | 100.00% |
| Total | 69 | 100.00% | 1 | 100.00% |
void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
{
void *ret, **slot;
spin_lock_irq(&mapping->tree_lock);
ret = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
if (WARN_ON_ONCE(!ret || !radix_tree_exceptional_entry(ret) ||
!slot_locked(mapping, slot))) {
spin_unlock_irq(&mapping->tree_lock);
return;
}
unlock_slot(mapping, slot);
spin_unlock_irq(&mapping->tree_lock);
dax_wake_mapping_entry_waiter(mapping, index, false);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 103 | 100.00% | 2 | 100.00% |
| Total | 103 | 100.00% | 2 | 100.00% |
static void put_locked_mapping_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
if (!radix_tree_exceptional_entry(entry)) {
unlock_page(entry);
put_page(entry);
} else {
dax_unlock_mapping_entry(mapping, index);
}
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 48 | 100.00% | 2 | 100.00% |
| Total | 48 | 100.00% | 2 | 100.00% |
/*
* Called when we are done with radix tree entry we looked up via
* get_unlocked_mapping_entry() and which we didn't lock in the end.
*/
static void put_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
if (!radix_tree_exceptional_entry(entry))
return;
/* We have to wake up next waiter for the radix tree entry lock */
dax_wake_mapping_entry_waiter(mapping, index, false);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 37 | 100.00% | 1 | 100.00% |
| Total | 37 | 100.00% | 1 | 100.00% |
/*
* Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
* entry to get unlocked before deleting it.
*/
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
{
void *entry;
spin_lock_irq(&mapping->tree_lock);
entry = get_unlocked_mapping_entry(mapping, index, NULL);
/*
* This gets called from truncate / punch_hole path. As such, the caller
* must hold locks protecting against concurrent modifications of the
* radix tree (usually fs-private i_mmap_sem for writing). Since the
* caller has seen exceptional entry for this index, we better find it
* at that index as well...
*/
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) {
spin_unlock_irq(&mapping->tree_lock);
return 0;
}
radix_tree_delete(&mapping->page_tree, index);
mapping->nrexceptional--;
spin_unlock_irq(&mapping->tree_lock);
dax_wake_mapping_entry_waiter(mapping, index, true);
return 1;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 99 | 100.00% | 1 | 100.00% |
| Total | 99 | 100.00% | 1 | 100.00% |
/*
* The user has performed a load from a hole in the file. Allocating
* a new page in the file would cause excessive storage usage for
* workloads with sparse files. We allocate a page cache page instead.
* We'll kick it out of the page cache if it's ever written to,
* otherwise it will simply fall out of the page cache under memory
* pressure without ever having been dirtied.
*/
static int dax_load_hole(struct address_space *mapping, void *entry,
struct vm_fault *vmf)
{
struct page *page;
/* Hole page already exists? Return it... */
if (!radix_tree_exceptional_entry(entry)) {
vmf->page = entry;
return VM_FAULT_LOCKED;
}
/* This will replace locked radix tree entry with a hole page */
page = find_or_create_page(mapping, vmf->pgoff,
vmf->gfp_mask | __GFP_ZERO);
if (!page) {
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
return VM_FAULT_OOM;
}
vmf->page = page;
return VM_FAULT_LOCKED;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 93 | 100.00% | 1 | 100.00% |
| Total | 93 | 100.00% | 1 | 100.00% |
static int copy_user_bh(struct page *to, struct inode *inode,
struct buffer_head *bh, unsigned long vaddr)
{
struct blk_dax_ctl dax = {
.sector = to_sector(bh, inode),
.size = bh->b_size,
};
struct block_device *bdev = bh->b_bdev;
void *vto;
if (dax_map_atomic(bdev, &dax) < 0)
return PTR_ERR(dax.addr);
vto = kmap_atomic(to);
copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
kunmap_atomic(vto);
dax_unmap_atomic(bdev, &dax);
return 0;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 61 | 50.00% | 1 | 20.00% |
ross zwisler | ross zwisler | 25 | 20.49% | 2 | 40.00% |
matthew wilcox | matthew wilcox | 24 | 19.67% | 1 | 20.00% |
dan williams | dan williams | 12 | 9.84% | 1 | 20.00% |
| Total | 122 | 100.00% | 5 | 100.00% |
#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
static void *dax_insert_mapping_entry(struct address_space *mapping,
struct vm_fault *vmf,
void *entry, sector_t sector)
{
struct radix_tree_root *page_tree = &mapping->page_tree;
int error = 0;
bool hole_fill = false;
void *new_entry;
pgoff_t index = vmf->pgoff;
if (vmf->flags & FAULT_FLAG_WRITE)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
/* Replacing hole page with block mapping? */
if (!radix_tree_exceptional_entry(entry)) {
hole_fill = true;
/*
* Unmap the page now before we remove it from page cache below.
* The page is locked so it cannot be faulted in again.
*/
unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
PAGE_SIZE, 0);
error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
if (error)
return ERR_PTR(error);
}
spin_lock_irq(&mapping->tree_lock);
new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
RADIX_DAX_ENTRY_LOCK);
if (hole_fill) {
__delete_from_page_cache(entry, NULL);
/* Drop pagecache reference */
put_page(entry);
error = radix_tree_insert(page_tree, index, new_entry);
if (error) {
new_entry = ERR_PTR(error);
goto unlock;
}
mapping->nrexceptional++;
} else {
void **slot;
void *ret;
ret = __radix_tree_lookup(page_tree, index, NULL, &slot);
WARN_ON_ONCE(ret != entry);
radix_tree_replace_slot(slot, new_entry);
}
if (vmf->flags & FAULT_FLAG_WRITE)
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
unlock:
spin_unlock_irq(&mapping->tree_lock);
if (hole_fill) {
radix_tree_preload_end();
/*
* We don't need hole page anymore, it has been replaced with
* locked radix tree entry now.
*/
if (mapping->a_ops->freepage)
mapping->a_ops->freepage(entry);
unlock_page(entry);
put_page(entry);
}
return new_entry;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
jan kara | jan kara | 166 | 53.21% | 1 | 33.33% |
ross zwisler | ross zwisler | 140 | 44.87% | 1 | 33.33% |
dmitriy monakhov | dmitriy monakhov | 6 | 1.92% | 1 | 33.33% |
| Total | 312 | 100.00% | 3 | 100.00% |
static int dax_writeback_one(struct block_device *bdev,
struct address_space *mapping, pgoff_t index, void *entry)
{
struct radix_tree_root *page_tree = &mapping->page_tree;
int type = RADIX_DAX_TYPE(entry);
struct radix_tree_node *node;
struct blk_dax_ctl dax;
void **slot;
int ret = 0;
spin_lock_irq(&mapping->tree_lock);
/*
* Regular page slots are stabilized by the page lock even
* without the tree itself locked. These unlocked entries
* need verification under the tree lock.
*/
if (!__radix_tree_lookup(page_tree, index, &node, &slot))
goto unlock;
if (*slot != entry)
goto unlock;
/* another fsync thread may have already written back this entry */
if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
goto unlock;
if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)