cregit-Linux how code gets into the kernel

Release 4.10 fs/ocfs2/aops.c

Directory: fs/ocfs2
/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * General Public License for more details.
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.

#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <asm/byteorder.h>
#include <linux/swap.h>
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
#include <linux/uio.h>

#include <cluster/masklog.h>

#include "ocfs2.h"

#include "alloc.h"
#include "aops.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "file.h"
#include "inode.h"
#include "journal.h"
#include "suballoc.h"
#include "super.h"
#include "symlink.h"
#include "refcounttree.h"
#include "ocfs2_trace.h"

#include "buffer_head_io.h"
#include "dir.h"
#include "namei.h"
#include "sysfile.h"

static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; int status; struct ocfs2_dinode *fe = NULL; struct buffer_head *bh = NULL; struct buffer_head *buffer_cache_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); void *kaddr; trace_ocfs2_symlink_get_block( (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)iblock, bh_result, create); BUG_ON(ocfs2_inode_is_fast_symlink(inode)); if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { mlog(ML_ERROR, "block offset > PATH_MAX: %llu", (unsigned long long)iblock); goto bail; } status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) bh->b_data; if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { err = -ENOMEM; mlog(ML_ERROR, "block offset is outside the allocated size: " "%llu\n", (unsigned long long)iblock); goto bail; } /* We don't use the page cache to create symlink data, so if * need be, copy it over from the buffer cache. */ if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock; buffer_cache_bh = sb_getblk(osb->sb, blkno); if (!buffer_cache_bh) { err = -ENOMEM; mlog(ML_ERROR, "couldn't getblock for symlink!\n"); goto bail; } /* we haven't locked out transactions, so a commit * could've happened. Since we've got a reference on * the bh, even if it commits while we're doing the * copy, the data is still good. */ if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { kaddr = kmap_atomic(bh_result->b_page); if (!kaddr) { mlog(ML_ERROR, "couldn't kmap!\n"); goto bail; } memcpy(kaddr + (bh_result->b_size * iblock), buffer_cache_bh->b_data, bh_result->b_size); kunmap_atomic(kaddr); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); } map_bh(bh_result, inode->i_sb, le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); err = 0; bail: brelse(bh); return err; }


mark fashehmark fasheh38694.61%125.00%
tao matao ma112.70%125.00%
rui xiangrui xiang102.45%125.00%
joel beckerjoel becker10.25%125.00%

int ocfs2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = 0; unsigned int ext_flags; u64 max_blocks = bh_result->b_size >> inode->i_blkbits; u64 p_blkno, count, past_eof; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); trace_ocfs2_get_block((unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)iblock, bh_result, create); if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", inode, inode->i_ino); if (S_ISLNK(inode->i_mode)) { /* this always does I/O for some reason. */ err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); goto bail; } err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, &ext_flags); if (err) { mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " "%llu, NULL)\n", err, inode, (unsigned long long)iblock, (unsigned long long)p_blkno); goto bail; } if (max_blocks < count) count = max_blocks; /* * ocfs2 never allocates in this function - the only time we * need to use BH_New is when we're extending i_size on a file * system which doesn't support holes, in which case BH_New * allows __block_write_begin() to zero. * * If we see this on a sparse file system, then a truncate has * raced us and removed the cluster. In this case, we clear * the buffers dirty and uptodate bits and let the buffer code * ignore it as a hole. */ if (create && p_blkno == 0 && ocfs2_sparse_alloc(osb)) { clear_buffer_dirty(bh_result); clear_buffer_uptodate(bh_result); goto bail; } /* Treat the unwritten extent as a hole for zeroing purposes. */ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) map_bh(bh_result, inode->i_sb, p_blkno); bh_result->b_size = count << inode->i_blkbits; if (!ocfs2_sparse_alloc(osb)) { if (p_blkno == 0) { err = -EIO; mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n", (unsigned long long)iblock, (unsigned long long)p_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno); mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); dump_stack(); goto bail; } } past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); trace_ocfs2_get_block_end((unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)past_eof); if (create && (iblock >= past_eof)) set_buffer_new(bh_result); bail: if (err < 0) err = -EIO; return err; }


mark fashehmark fasheh37489.90%550.00%
tao matao ma215.05%110.00%
coly licoly li163.85%110.00%
wengang wangwengang wang30.72%110.00%
christoph hellwigchristoph hellwig10.24%110.00%
joel beckerjoel becker10.24%110.00%

int ocfs2_read_inline_data(struct inode *inode, struct page *page, struct buffer_head *di_bh) { void *kaddr; loff_t size; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); return -EROFS; } size = i_size_read(inode); if (size > PAGE_SIZE || size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) { ocfs2_error(inode->i_sb, "Inode %llu has with inline data has bad size: %Lu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)size); return -EROFS; } kaddr = kmap_atomic(page); if (size) memcpy(kaddr, di->id2.i_data.id_data, size); /* Clear the remaining part of the page */ memset(kaddr + size, 0, PAGE_SIZE - size); flush_dcache_page(page); kunmap_atomic(kaddr); SetPageUptodate(page); return 0; }


mark fashehmark fasheh18593.43%120.00%
jan karajan kara63.03%120.00%
tiger yangtiger yang31.52%120.00%
joe perchesjoe perches21.01%120.00%
kirill a. shutemovkirill a. shutemov21.01%120.00%

static int ocfs2_readpage_inline(struct inode *inode, struct page *page) { int ret; struct buffer_head *di_bh = NULL; BUG_ON(!PageLocked(page)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_read_inline_data(inode, page, di_bh); out: unlock_page(page); brelse(di_bh); return ret; }


mark fashehmark fasheh9696.00%125.00%
joel beckerjoel becker22.00%250.00%
julia lawalljulia lawall22.00%125.00%

static int ocfs2_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; struct ocfs2_inode_info *oi = OCFS2_I(inode); loff_t start = (loff_t)page->index << PAGE_SHIFT; int ret, unlock = 1; trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, (page ? page->index : 0)); ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); if (ret != 0) { if (ret == AOP_TRUNCATED_PAGE) unlock = 0; mlog_errno(ret); goto out; } if (down_read_trylock(&oi->ip_alloc_sem) == 0) { /* * Unlock the page and cycle ip_alloc_sem so that we don't * busyloop waiting for ip_alloc_sem to unlock */ ret = AOP_TRUNCATED_PAGE; unlock_page(page); unlock = 0; down_read(&oi->ip_alloc_sem); up_read(&oi->ip_alloc_sem); goto out_inode_unlock; } /* * i_size might have just been updated as we grabed the meta lock. We * might now be discovering a truncate that hit on another node. * block_read_full_page->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers * (generic_file_read, vm_ops->fault) are clever enough to check i_size * and notice that the page they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? */ if (start >= i_size_read(inode)) { zero_user(page, 0, PAGE_SIZE); SetPageUptodate(page); ret = 0; goto out_alloc; } if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) ret = ocfs2_readpage_inline(inode, page); else ret = block_read_full_page(page, ocfs2_get_block); unlock = 0; out_alloc: up_read(&OCFS2_I(inode)->ip_alloc_sem); out_inode_unlock: ocfs2_inode_unlock(inode, 0); out: if (unlock) unlock_page(page); return ret; }


mark fashehmark fasheh22685.61%444.44%
jan karajan kara269.85%111.11%
tao matao ma93.41%111.11%
christoph lameterchristoph lameter10.38%111.11%
kirill a. shutemovkirill a. shutemov10.38%111.11%
nick pigginnick piggin10.38%111.11%

/* * This is used only for read-ahead. Failures or difficult to handle * situations are safe to ignore. * * Right now, we don't bother with BH_Boundary - in-inode extent lists * are quite large (243 extents on 4k blocks), so most inodes don't * grow out to a tree. If need be, detecting boundary extents could * trivially be added in a future version of ocfs2_get_block(). */
static int ocfs2_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { int ret, err = -EIO; struct inode *inode = mapping->host; struct ocfs2_inode_info *oi = OCFS2_I(inode); loff_t start; struct page *last; /* * Use the nonblocking flag for the dlm code to avoid page * lock inversion, but don't bother with retrying. */ ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); if (ret) return err; if (down_read_trylock(&oi->ip_alloc_sem) == 0) { ocfs2_inode_unlock(inode, 0); return err; } /* * Don't bother with inline-data. There isn't anything * to read-ahead in that case anyway... */ if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) goto out_unlock; /* * Check whether a remote node truncated this file - we just * drop out in that case as it's not worth handling here. */ last = list_entry(pages->prev, struct page, lru); start = (loff_t)last->index << PAGE_SHIFT; if (start >= i_size_read(inode)) goto out_unlock; err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); out_unlock: up_read(&oi->ip_alloc_sem); ocfs2_inode_unlock(inode, 0); return err; }


mark fashehmark fasheh18699.47%150.00%
kirill a. shutemovkirill a. shutemov10.53%150.00%

/* Note: Because we don't support holes, our allocation has * already happened (allocation writes zeros to the file data) * so we don't have to worry about ordered writes in * ocfs2_writepage. * * ->writepage is called during the process of invalidating the page cache * during blocked lock processing. It can't block on any cluster locks * to during block mapping. It's relying on the fact that the block * mapping can't have disappeared under the dirty pages that it is * being asked to write back. */
static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) { trace_ocfs2_writepage( (unsigned long long)OCFS2_I(page->mapping->host)->ip_blkno, page->index); return block_write_full_page(page, ocfs2_get_block, wbc); }


mark fashehmark fasheh3061.22%150.00%
tao matao ma1938.78%150.00%

/* Taken from ext3. We don't necessarily need the full blown * functionality yet, but IMHO it's better to cut and paste the whole * thing so we can avoid introducing our own bugs (and easily pick up * their fixes when they happen) --Mark */
int walk_page_buffers( handle_t *handle, struct buffer_head *head, unsigned from, unsigned to, int *partial, int (*fn)( handle_t *handle, struct buffer_head *bh)) { struct buffer_head *bh; unsigned block_start, block_end; unsigned blocksize = head->b_size; int err, ret = 0; struct buffer_head *next; for ( bh = head, block_start = 0; ret == 0 && (bh != head || !block_start); block_start = block_end, bh = next) { next = bh->b_this_page; block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (partial && !buffer_uptodate(bh)) *partial = 1; continue; } err = (*fn)(handle, bh); if (!ret) ret = err; } return ret; }


mark fashehmark fasheh166100.00%1100.00%

static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) { sector_t status; u64 p_blkno = 0; int err = 0; struct inode *inode = mapping->host; trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)block); /* * The swap code (ab-)uses ->bmap to get a block mapping and then * bypasseѕ the file system for actual I/O. We really can't allow * that on refcounted inodes, so we have to skip out here. And yes, * 0 is the magic code for a bmap error.. */ if (ocfs2_is_refcount_inode(inode)) return 0; /* We don't need to lock journal system files, since they aren't * accessed concurrently from multiple nodes. */ if (!INODE_JOURNAL(inode)) { err = ocfs2_inode_lock(inode, NULL, 0); if (err) { if (err != -ENOENT) mlog_errno(err); goto bail; } down_read(&OCFS2_I(inode)->ip_alloc_sem); } if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); if (!INODE_JOURNAL(inode)) { up_read(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_inode_unlock(inode, 0); } if (err) { mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", (unsigned long long)block); mlog_errno(err); goto bail; } bail: status = err ? 0 : p_blkno; return status; }


mark fashehmark fasheh19989.64%466.67%
tao matao ma125.41%116.67%
darrick j. wongdarrick j. wong114.95%116.67%

static int ocfs2_releasepage(struct page *page, gfp_t wait) { if (!page_has_buffers(page)) return 0; return try_to_free_buffers(page); }


ryan dingryan ding2580.65%150.00%
mark fashehmark fasheh619.35%150.00%

static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, u32 cpos, unsigned int *start, unsigned int *end) { unsigned int cluster_start = 0, cluster_end = PAGE_SIZE; if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits)) { unsigned int cpp; cpp = 1 << (PAGE_SHIFT - osb->s_clustersize_bits); cluster_start = cpos % cpp; cluster_start = cluster_start << osb->s_clustersize_bits; cluster_end = cluster_start + osb->s_clustersize; } BUG_ON(cluster_start > PAGE_SIZE); BUG_ON(cluster_end > PAGE_SIZE); if (start) *start = cluster_start; if (end) *end = cluster_end; }


ryan dingryan ding7160.68%111.11%
joseph qijoseph qi2319.66%222.22%
mark fashehmark fasheh1311.11%333.33%
tao matao ma43.42%111.11%
wang weiweiwang weiwei32.56%111.11%
kirill a. shutemovkirill a. shutemov32.56%111.11%

/* * 'from' and 'to' are the region in the page to avoid zeroing. * * If pagesize > clustersize, this function will avoid zeroing outside * of the cluster boundary. * * from == to == 0 is code for "zero the entire cluster region" */
static void ocfs2_clear_page_regions(struct page *page, struct ocfs2_super *osb, u32 cpos, unsigned from, unsigned to) { void *kaddr; unsigned int cluster_start, cluster_end; ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); kaddr = kmap_atomic(page); if (from || to) { if (from > cluster_start) memset(kaddr + cluster_start, 0, from - cluster_start); if (to < cluster_end) memset(kaddr + to, 0, cluster_end - to); } else { memset(kaddr + cluster_start, 0, cluster_end - cluster_start); } kunmap_atomic(kaddr); }


ryan dingryan ding8569.67%120.00%
joseph qijoseph qi3226.23%240.00%
yiwen jiangyiwen jiang32.46%120.00%
wang weiweiwang weiwei21.64%120.00%

/* * Nonsparse file systems fully allocate before we get to the write * code. This prevents ocfs2_write() from tagging the write as an * allocating one, which means ocfs2_map_page_blocks() might try to * read-in the blocks at the tail of our file. Avoid reading them by * testing i_size against each block offset. */
static int ocfs2_should_read_blk(struct inode *inode, struct page *page, unsigned int block_start) { u64 offset = page_offset(page) + block_start; if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) return 1; if (i_size_read(inode) > offset) return 1; return 0; }


ryan dingryan ding4575.00%120.00%
wang weiweiwang weiwei58.33%120.00%
joseph qijoseph qi58.33%120.00%
mark fashehmark fasheh58.33%240.00%

/* * Some of this taken from __block_write_begin(). We already have our * mapping by now though, and the entire write will be allocating or * it won't, so not much need to use BH_New. * * This will also skip zeroing, which is handled externally. */
int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new) { int ret = 0; struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; unsigned int block_end, block_start; unsigned int bsize = 1 << inode->i_blkbits; if (!page_has_buffers(page)) create_empty_buffers(page, bsize, 0); head = page_buffers(page); for (bh = head, block_start = 0; bh != head || !block_start; bh = bh->b_this_page, block_start += bsize) { block_end = block_start + bsize; clear_buffer_new(bh); /* * Ignore blocks outside of our i/o range - * they may belong to unallocated clusters. */ if (block_start >= to || block_end <= from) { if (PageUptodate(page)) set_buffer_uptodate(bh); continue; } /* * For an allocating write with cluster size >= page * size, we always write the entire page. */ if (new) set_buffer_new(bh); if (!buffer_mapped(bh)) { map_bh(bh, inode->i_sb, *p_blkno); clean_bdev_bh_alias(bh); } if (PageUptodate(page)) { if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_new(bh) && ocfs2_should_read_blk(inode, page, block_start) && (block_start < from || block_end > to)) { ll_rw_block(REQ_OP_READ, 0, 1, &bh); *wait_bh++=bh; } *p_blkno = *p_blkno + 1; } /* * If we issued read requests - let them complete. */ while(wait_bh > wait) { wait_on_buffer(*--wait_bh); if (!buffer_uptodate(*wait_bh)) ret = -EIO; } if (ret == 0 || !new) return ret; /* * If we get -EIO above, zero out any newly allocated blocks * to avoid exposing stale data. */ bh = head; block_start = 0; do { block_end = block_start + bsize; if (block_end <= from) goto next_bh; if (block_start >= to) break; zero_user(page, block_start, bh->b_size); set_buffer_uptodate(bh); mark_buffer_dirty(bh); next_bh: block_start = block_end; bh = bh->b_this_page; } while (bh != head); return ret; }


ryan dingryan ding29472.24%18.33%
joseph qijoseph qi5613.76%325.00%
mark fashehmark fasheh327.86%325.00%
joel beckerjoel becker112.70%18.33%
christoph hellwigchristoph hellwig51.23%18.33%
tristan yetristan ye51.23%18.33%
michael christiemichael christie30.74%18.33%
jan karajan kara10.25%18.33%

#if (PAGE_SIZE >= OCFS2_MAX_CLUSTERSIZE) #define OCFS2_MAX_CTXT_PAGES 1 #else #define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_SIZE) #endif #define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_SIZE / OCFS2_MIN_CLUSTERSIZE) struct ocfs2_unwritten_extent { struct list_head ue_node; struct list_head ue_ip_node; u32 ue_cpos; u32 ue_phys; }; /* * Describe the state of a single cluster to be written to. */ struct ocfs2_write_cluster_desc { u32 c_cpos; u32 c_phys; /* * Give this a unique field because c_phys eventually gets * filled. */ unsigned c_new; unsigned c_clear_unwritten; unsigned c_needs_zero; }; struct ocfs2_write_ctxt { /* Logical cluster position / len of write */ u32 w_cpos; u32 w_clen; /* First cluster allocated in a nonsparse extend */ u32 w_first_new_cpos; /* T