cregit-Linux how code gets into the kernel

Release 4.11 fs/jffs2/gc.c

Directory: fs/jffs2
 * JFFS2 -- Journalling Flash File System, Version 2.
 * Copyright © 2001-2007 Red Hat, Inc.
 * Copyright © 2004-2010 David Woodhouse <>
 * Created by David Woodhouse <>
 * For licensing information, see the file 'LICENCE' in this directory.

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/mtd/mtd.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/crc32.h>
#include <linux/compiler.h>
#include <linux/stat.h>
#include "nodelist.h"
#include "compr.h"

static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
					  struct jffs2_inode_cache *ic,
					  struct jffs2_raw_node_ref *raw);
static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
				      uint32_t start, uint32_t end);
static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
				       uint32_t start, uint32_t end);
static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);

/* Called with erase_completion_lock held */

static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c) { struct jffs2_eraseblock *ret; struct list_head *nextlist = NULL; int n = jiffies % 128; /* Pick an eraseblock to garbage collect next. This is where we'll put the clever wear-levelling algorithms. Eventually. */ /* We possibly want to favour the dirtier blocks more when the number of free blocks is low. */ again: if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) { jffs2_dbg(1, "Picking block from bad_used_list to GC next\n"); nextlist = &c->bad_used_list; } else if (n < 50 && !list_empty(&c->erasable_list)) { /* Note that most of them will have gone directly to be erased. So don't favour the erasable_list _too_ much. */ jffs2_dbg(1, "Picking block from erasable_list to GC next\n"); nextlist = &c->erasable_list; } else if (n < 110 && !list_empty(&c->very_dirty_list)) { /* Most of the time, pick one off the very_dirty list */ jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n"); nextlist = &c->very_dirty_list; } else if (n < 126 && !list_empty(&c->dirty_list)) { jffs2_dbg(1, "Picking block from dirty_list to GC next\n"); nextlist = &c->dirty_list; } else if (!list_empty(&c->clean_list)) { jffs2_dbg(1, "Picking block from clean_list to GC next\n"); nextlist = &c->clean_list; } else if (!list_empty(&c->dirty_list)) { jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n"); nextlist = &c->dirty_list; } else if (!list_empty(&c->very_dirty_list)) { jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"); nextlist = &c->very_dirty_list; } else if (!list_empty(&c->erasable_list)) { jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"); nextlist = &c->erasable_list; } else if (!list_empty(&c->erasable_pending_wbuf_list)) { /* There are blocks are wating for the wbuf sync */ jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"); spin_unlock(&c->erase_completion_lock); jffs2_flush_wbuf_pad(c); spin_lock(&c->erase_completion_lock); goto again; } else { /* Eep. All were empty */ jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"); return NULL; } ret = list_entry(nextlist->next, struct jffs2_eraseblock, list); list_del(&ret->list); c->gcblock = ret; ret->gc_node = ret->first_node; if (!ret->gc_node) { pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset); BUG(); } /* Have we accidentally picked a clean block with wasted space ? */ if (ret->wasted_size) { jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n", ret->wasted_size); ret->dirty_size += ret->wasted_size; c->wasted_size -= ret->wasted_size; c->dirty_size += ret->wasted_size; ret->wasted_size = 0; } return ret; }


Linus Torvalds19944.12%19.09%
David Woodhouse17137.92%436.36%
Artem B. Bityutskiy459.98%218.18%
Joe Perches357.76%327.27%
Thomas Gleixner10.22%19.09%

/* jffs2_garbage_collect_pass * Make a single attempt to progress GC. Move one node, and possibly * start erasing one eraseblock. */
int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) { struct jffs2_inode_info *f; struct jffs2_inode_cache *ic; struct jffs2_eraseblock *jeb; struct jffs2_raw_node_ref *raw; uint32_t gcblock_dirty; int ret = 0, inum, nlink; int xattr = 0; if (mutex_lock_interruptible(&c->alloc_sem)) return -EINTR; for (;;) { /* We can't start doing GC until we've finished checking the node CRCs etc. */ int bucket, want_ino; spin_lock(&c->erase_completion_lock); if (!c->unchecked_size) break; spin_unlock(&c->erase_completion_lock); if (!xattr) xattr = jffs2_verify_xattr(c); spin_lock(&c->inocache_lock); /* Instead of doing the inodes in numeric order, doing a lookup * in the hash for each possible number, just walk the hash * buckets of *existing* inodes. This means that we process * them out-of-order, but it can be a lot faster if there's * a sparse inode# space. Which there often is. */ want_ino = c->check_ino; for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) { for (ic = c->inocache_list[bucket]; ic; ic = ic->next) { if (ic->ino < want_ino) continue; if (ic->state != INO_STATE_CHECKEDABSENT && ic->state != INO_STATE_PRESENT) goto got_next; /* with inocache_lock held */ jffs2_dbg(1, "Skipping ino #%u already checked\n", ic->ino); } want_ino = 0; } /* Point c->check_ino past the end of the last bucket. */ c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) & ~c->inocache_hashsize) - 1; spin_unlock(&c->inocache_lock); pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n", c->unchecked_size); jffs2_dbg_dump_block_lists_nolock(c); mutex_unlock(&c->alloc_sem); return -ENOSPC; got_next: /* For next time round the loop, we want c->checked_ino to indicate * the *next* one we want to check. And since we're walking the * buckets rather than doing it sequentially, it's: */ c->check_ino = ic->ino + c->inocache_hashsize; if (!ic->pino_nlink) { jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n", ic->ino); spin_unlock(&c->inocache_lock); jffs2_xattr_delete_inode(c, ic); continue; } switch(ic->state) { case INO_STATE_CHECKEDABSENT: case INO_STATE_PRESENT: spin_unlock(&c->inocache_lock); continue; case INO_STATE_GC: case INO_STATE_CHECKING: pr_warn("Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state); spin_unlock(&c->inocache_lock); BUG(); case INO_STATE_READING: /* We need to wait for it to finish, lest we move on and trigger the BUG() above while we haven't yet finished checking all its nodes */ jffs2_dbg(1, "Waiting for ino #%u to finish reading\n", ic->ino); /* We need to come back again for the _same_ inode. We've made no progress in this case, but that should be OK */ c->check_ino = ic->ino; mutex_unlock(&c->alloc_sem); sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); return 0; default: BUG(); case INO_STATE_UNCHECKED: ; } ic->state = INO_STATE_CHECKING; spin_unlock(&c->inocache_lock); jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n", __func__, ic->ino); ret = jffs2_do_crccheck_inode(c, ic); if (ret) pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino); jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT); mutex_unlock(&c->alloc_sem); return ret; } /* If there are any blocks which need erasing, erase them now */ if (!list_empty(&c->erase_complete_list) || !list_empty(&c->erase_pending_list)) { spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__); if (jffs2_erase_pending_blocks(c, 1)) return 0; jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n"); mutex_lock(&c->alloc_sem); spin_lock(&c->erase_completion_lock); } /* First, work out which block we're garbage-collecting */ jeb = c->gcblock; if (!jeb) jeb = jffs2_find_gc_block(c); if (!jeb) { /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */ if (c->nr_erasing_blocks) { spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); return -EAGAIN; } jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n"); spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); return -EIO; } jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size); D1(if (c->nextblock) printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size)); if (!jeb->used_size) { mutex_unlock(&c->alloc_sem); goto eraseit; } raw = jeb->gc_node; gcblock_dirty = jeb->dirty_size; while(ref_obsolete(raw)) { jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)); raw = ref_next(raw); if (unlikely(!raw)) { pr_warn("eep. End of raw list while still supposedly nodes to GC\n"); pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size); jeb->gc_node = raw; spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); BUG(); } } jeb->gc_node = raw; jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n", ref_offset(raw)); if (!raw->next_in_ino) { /* Inode-less node. Clean marker, snapshot or something like that */ spin_unlock(&c->erase_completion_lock); if (ref_flags(raw) == REF_PRISTINE) { /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */ jffs2_garbage_collect_pristine(c, NULL, raw); } else { /* Just mark it obsolete */ jffs2_mark_node_obsolete(c, raw); } mutex_unlock(&c->alloc_sem); goto eraseit_lock; } ic = jffs2_raw_ref_to_ic(raw); #ifdef CONFIG_JFFS2_FS_XATTR /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr. * We can decide whether this node is inode or xattr by ic->class. */ if (ic->class == RAWNODE_CLASS_XATTR_DATUM || ic->class == RAWNODE_CLASS_XATTR_REF) { spin_unlock(&c->erase_completion_lock); if (ic->class == RAWNODE_CLASS_XATTR_DATUM) { ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw); } else { ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw); } goto test_gcnode; } #endif /* We need to hold the inocache. Either the erase_completion_lock or the inocache_lock are sufficient; we trade down since the inocache_lock causes less contention. */ spin_lock(&c->inocache_lock); spin_unlock(&c->erase_completion_lock); jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", __func__, jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino); /* Three possibilities: 1. Inode is already in-core. We must iget it and do proper updating to its fragtree, etc. 2. Inode is not in-core, node is REF_PRISTINE. We lock the inocache to prevent a read_inode(), copy the node intact. 3. Inode is not in-core, node is not pristine. We must iget() and take the slow path. */ switch(ic->state) { case INO_STATE_CHECKEDABSENT: /* It's been checked, but it's not currently in-core. We can just copy any pristine nodes, but have to prevent anyone else from doing read_inode() while we're at it, so we set the state accordingly */ if (ref_flags(raw) == REF_PRISTINE) ic->state = INO_STATE_GC; else { jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n", ic->ino); } break; case INO_STATE_PRESENT: /* It's in-core. GC must iget() it. */ break; case INO_STATE_UNCHECKED: case INO_STATE_CHECKING: case INO_STATE_GC: /* Should never happen. We should have finished checking by the time we actually start doing any GC, and since we're holding the alloc_sem, no other garbage collection can happen. */ pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n", ic->ino, ic->state); mutex_unlock(&c->alloc_sem); spin_unlock(&c->inocache_lock); BUG(); case INO_STATE_READING: /* Someone's currently trying to read it. We must wait for them to finish and then go through the full iget() route to do the GC. However, sometimes read_inode() needs to get the alloc_sem() (for marking nodes invalid) so we must drop the alloc_sem before sleeping. */ mutex_unlock(&c->alloc_sem); jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n", __func__, ic->ino, ic->state); sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); /* And because we dropped the alloc_sem we must start again from the beginning. Ponder chance of livelock here -- we're returning success without actually making any progress. Q: What are the chances that the inode is back in INO_STATE_READING again by the time we next enter this function? And that this happens enough times to cause a real delay? A: Small enough that I don't care :) */ return 0; } /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the node intact, and we don't have to muck about with the fragtree etc. because we know it's not in-core. If it _was_ in-core, we go through all the iget() crap anyway */ if (ic->state == INO_STATE_GC) { spin_unlock(&c->inocache_lock); ret = jffs2_garbage_collect_pristine(c, ic, raw); spin_lock(&c->inocache_lock); ic->state = INO_STATE_CHECKEDABSENT; wake_up(&c->inocache_wq); if (ret != -EBADFD) { spin_unlock(&c->inocache_lock); goto test_gcnode; } /* Fall through if it wanted us to, with inocache_lock held */ } /* Prevent the fairly unlikely race where the gcblock is entirely obsoleted by the final close of a file which had the only valid nodes in the block, followed by erasure, followed by freeing of the ic because the erased block(s) held _all_ the nodes of that inode.... never been seen but it's vaguely possible. */ inum = ic->ino; nlink = ic->pino_nlink; spin_unlock(&c->inocache_lock); f = jffs2_gc_fetch_inode(c, inum, !nlink); if (IS_ERR(f)) { ret = PTR_ERR(f); goto release_sem; } if (!f) { ret = 0; goto release_sem; } ret = jffs2_garbage_collect_live(c, jeb, raw, f); jffs2_gc_release_inode(c, f); test_gcnode: if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) { /* Eep. This really should never happen. GC is broken */ pr_err("Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node)); ret = -ENOSPC; } release_sem: mutex_unlock(&c->alloc_sem); eraseit_lock: /* If we've finished this block, start it erasing */ spin_lock(&c->erase_completion_lock); eraseit: if (c->gcblock && !c->gcblock->used_size) { jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset); /* We're GC'ing an empty block? */ list_add_tail(&c->gcblock->list, &c->erase_pending_list); c->gcblock = NULL; c->nr_erasing_blocks++; jffs2_garbage_collect_trigger(c); } spin_unlock(&c->erase_completion_lock); return ret; }


David Woodhouse97867.40%1659.26%
Linus Torvalds28919.92%13.70%
KaiGai Kohei1016.96%414.81%
Joe Perches594.07%311.11%
Joakim Tjernlund140.96%13.70%
Thomas Gleixner60.41%13.70%
Julia Cartwright40.28%13.70%

static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f) { struct jffs2_node_frag *frag; struct jffs2_full_dnode *fn = NULL; struct jffs2_full_dirent *fd; uint32_t start = 0, end = 0, nrfrags = 0; int ret = 0; mutex_lock(&f->sem); /* Now we have the lock for this inode. Check that it's still the one at the head of the list. */ spin_lock(&c->erase_completion_lock); if (c->gcblock != jeb) { spin_unlock(&c->erase_completion_lock); jffs2_dbg(1, "GC block is no longer gcblock. Restart\n"); goto upnout; } if (ref_obsolete(raw)) { spin_unlock(&c->erase_completion_lock); jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n"); /* They'll call again */ goto upnout; } spin_unlock(&c->erase_completion_lock); /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */ if (f->metadata && f->metadata->raw == raw) { fn = f->metadata; ret = jffs2_garbage_collect_metadata(c, jeb, f, fn); goto upnout; } /* FIXME. Read node and do lookup? */ for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) { if (frag->node && frag->node->raw == raw) { fn = frag->node; end = frag->ofs + frag->size; if (!nrfrags++) start = frag->ofs; if (nrfrags == frag->node->frags) break; /* We've found them all */ } } if (fn) { if (ref_flags(raw) == REF_PRISTINE) { ret = jffs2_garbage_collect_pristine(c, f->inocache, raw); if (!ret) { /* Urgh. Return it sensibly. */ frag->node->raw = f->inocache->nodes; } if (ret != -EBADFD) goto upnout; } /* We found a datanode. Do the GC */ if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) { /* It crosses a page boundary. Therefore, it must be a hole. */ ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end); } else { /* It could still be a hole. But we GC the page this way anyway */ ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end); } goto upnout; } /* Wasn't a dnode. Try dirent */ for (fd = f->dents; fd; fd=fd->next) { if (fd->raw == raw) break; } if (fd && fd->ino) { ret = jffs2_garbage_collect_dirent(c, jeb, f, fd); } else if (fd) { ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd); } else { pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n", ref_offset(raw), f->inocache->ino); if (ref_obsolete(raw)) { pr_warn("But it's obsolete so we don't mind too much\n"); } else { jffs2_dbg_dump_node(c, ref_offset(raw)); BUG(); } } upnout: mutex_unlock(&f->sem); return ret; }


David Woodhouse49595.74%450.00%
Artem B. Bityutskiy122.32%112.50%
Joe Perches81.55%225.00%
Kirill A. Shutemov20.39%112.50%

static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, struct jffs2_raw_node_ref *raw) { union jffs2_node_union *node; size_t retlen; int ret; uint32_t phys_ofs, alloclen; uint32_t crc, rawlen; int retried = 0; jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)); alloclen = rawlen = ref_totlen(c, c->gcblock, raw); /* Ask for a small amount of space (or the totlen if smaller) because we don't want to force wastage of the end of a block if splitting would work. */ if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN) alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN; ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen); /* 'rawlen' is not the exact summary size; it is only an upper estimation */ if (ret) return ret; if (alloclen < rawlen) { /* Doesn't fit untouched. We'll go the old route and split it */ return -EBADFD; } node = kmalloc(rawlen, GFP_KERNEL); if (!node) return -ENOMEM; ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node); if (!ret && retlen != rawlen) ret = -EIO; if (ret) goto out_node; crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4); if (je32_to_cpu(node->u.hdr_crc) != crc) { pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc); goto bail; } switch(je16_to_cpu(node->u.nodetype)) { case JFFS2_NODETYPE_INODE: crc = crc32(0, node, sizeof(node->i)-8); if (je32_to_cpu(node->i.node_crc) != crc) { pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->i.node_crc), crc); goto bail; } if (je32_to_cpu(node->i.dsize)) { crc = crc32(0, node->, je32_to_cpu(node->i.csize)); if (je32_to_cpu(node->i.data_crc) != crc) { pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->i.data_crc), crc); goto bail; } } break; case JFFS2_NODETYPE_DIRENT: crc = crc32(0, node, sizeof(node->d)-8); if (je32_to_cpu(node->d.node_crc) != crc) { pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->d.node_crc), crc); goto bail; } if (strnlen(node->, node->d.nsize) != node->d.nsize