mtd/fs/jffs2 gc.c,1.123,1.124 nodelist.h,1.109,1.110
David Woodhouse
dwmw2 at infradead.org
Sun Nov 2 14:28:45 EST 2003
Update of /home/cvs/mtd/fs/jffs2
In directory phoenix.infradead.org:/tmp/cvs-serv19651
Modified Files:
gc.c nodelist.h
Log Message:
Fix GC races.
Index: gc.c
===================================================================
RCS file: /home/cvs/mtd/fs/jffs2/gc.c,v
retrieving revision 1.123
retrieving revision 1.124
diff -u -r1.123 -r1.124
--- gc.c 2 Nov 2003 12:30:16 -0000 1.123
+++ gc.c 2 Nov 2003 19:28:42 -0000 1.124
@@ -115,7 +115,6 @@
struct jffs2_inode_cache *ic;
struct jffs2_eraseblock *jeb;
struct jffs2_raw_node_ref *raw;
- uint32_t inum;
int ret = 0;
if (down_interruptible(&c->alloc_sem))
@@ -223,17 +222,21 @@
while(ref_obsolete(raw)) {
D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
- jeb->gc_node = raw = raw->next_phys;
- if (!raw) {
+ raw = raw->next_phys;
+ if (unlikely(!raw)) {
printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
+ jeb->gc_node = raw;
spin_unlock(&c->erase_completion_lock);
up(&c->alloc_sem);
BUG();
}
}
+ jeb->gc_node = raw;
+
D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
+
if (!raw->next_in_ino) {
/* Inode-less node. Clean marker, snapshot or something like that */
/* FIXME: If it's something that needs to be copied, including something
@@ -243,13 +246,15 @@
up(&c->alloc_sem);
goto eraseit_lock;
}
-
- inum = jffs2_raw_ref_to_inum(raw);
- D1(printk(KERN_DEBUG "Inode number is #%u\n", inum));
+
+ ic = jffs2_raw_ref_to_ic(raw);
+
+ /* We need to hold the inocache */
+ spin_lock(&c->inocache_lock);
spin_unlock(&c->erase_completion_lock);
- D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), inum));
+ D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
/* Three possibilities:
1. Inode is already in-core. We must iget it and do proper
@@ -259,11 +264,6 @@
3. Inode is not in-core, node is not pristine. We must iget()
and take the slow path.
*/
- spin_lock(&c->inocache_lock);
- ic = jffs2_get_ino_cache(c, inum);
-
- /* This should never fail unless I'm particularly stupid.
- So we don't check before dereferencing it */
switch(ic->state) {
case INO_STATE_CHECKEDABSENT:
@@ -275,7 +275,7 @@
ic->state = INO_STATE_GC;
else {
D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
- inum));
+ ic->ino));
}
break;
@@ -306,7 +306,7 @@
up(&c->alloc_sem);
D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
- inum, ic->state));
+ ic->ino, ic->state));
sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
/* And because we dropped the alloc_sem we must start again from the
beginning. Ponder chance of livelock here -- we're returning success
@@ -321,14 +321,14 @@
return 0;
}
- spin_unlock(&c->inocache_lock);
-
/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
node intact, and we don't have to muck about with the fragtree etc.
because we know it's not in-core. If it _was_ in-core, we go through
all the iget() crap anyway */
if (ic->state == INO_STATE_GC) {
+ spin_unlock(&c->inocache_lock);
+
ret = jffs2_garbage_collect_pristine(c, ic, raw);
jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
@@ -338,6 +338,13 @@
/* Fall through if it wanted us to */
}
+ /* FIXME: We still have the inocache_lock held. This is ugly.
+ It's done to prevent the fairly unlikely race where the
+ gcblock is entirely obsoleted by the final close of a file
+ which had the only valid nodes in the block, followed by
+ erasure, followed by freeing of the ic because the erased
+ block(s) held _all_ the nodes of that inode.... never been
+ seen but it's vaguely possible. */
ret = jffs2_garbage_collect_live(c, jeb, raw, ic);
release_sem:
@@ -373,6 +380,10 @@
int ret = 0;
if (!ic->nlink) {
+ int inum = ic->ino;
+
+ spin_unlock(&c->inocache_lock);
+
/* The inode has zero nlink but its nodes weren't yet marked
obsolete. This has to be because we're still waiting for
the final (close() and) iput() to happen.
@@ -387,12 +398,17 @@
holding the alloc_sem, and jffs2_do_unlink() would also
need that while decrementing nlink on any inode.
*/
- inode = ilookup(OFNI_BS_2SFFJ(c), ic->ino);
+ inode = ilookup(OFNI_BS_2SFFJ(c), inum);
if (!inode) {
D1(printk(KERN_DEBUG "ilookup() failed for ino #%u; inode is probably deleted.\n",
- ic->ino));
+ inum));
spin_lock(c->inocache_lock);
+ ic = jffs2_get_ino_cache(c, inum);
+ if (!ic) {
+ D1(printk(KERN_DEBUG "Inode cache for ino #%u is gone.\n", inum));
+ return 0;
+ }
if (ic->state != INO_STATE_CHECKEDABSENT) {
/* Wait for progress. Don't just loop */
D1(printk(KERN_DEBUG "Waiting for ino #%u in state %d\n",
@@ -404,6 +420,8 @@
return 0;
}
} else {
+ spin_unlock(&c->inocache_lock);
+
/* Inode has links to it still; they're not going away because
jffs2_do_unlink() would need the alloc_sem and we have it.
Just iget() it, and if read_inode() is necessary that's OK.
@@ -413,7 +431,8 @@
return -ENOMEM;
}
if (is_bad_inode(inode)) {
- printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d, state %d\n", ic->ino, ic->nlink, ic->state);
+ printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d, state %d\n",
+ ic->ino, ic->nlink, ic->state);
/* NB. This will happen again. We need to do something appropriate here. */
ret = -EIO;
goto put_out;
@@ -425,11 +444,21 @@
/* Now we have the lock for this inode. Check that it's still the one at the head
of the list. */
+ spin_lock(&c->erase_completion_lock);
+
+ if (c->gcblock != jeb) {
+ spin_unlock(&erase_completion_lock);
+ D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
+ goto upnout;
+ }
if (ref_obsolete(raw)) {
+ spin_unlock(&erase_completion_lock);
D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
/* They'll call again */
goto upnout;
}
+ spin_unlock(&erase_completion_lock);
+
/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
if (f->metadata && f->metadata->raw == raw) {
fn = f->metadata;
@@ -442,54 +471,6 @@
if (frag->node && frag->node->raw == raw) {
fn = frag->node;
end = frag->ofs + frag->size;
-#if 0 /* Checked in readinode.c now anyway. */
- if (!nrfrags && ref_flags(fn->raw) == REF_PRISTINE) {
- int bitched = 0;
-
- if (fn->frags > 1) {
- printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had %d frags. Tell dwmw2\n", ref_offset(raw), fn->frags);
- bitched = 1;
- }
- /* A hole node which isn't multi-page should be garbage-collected
- and merged anyway, so we just check for the frag size here,
- rather than mucking around with actually reading the node
- and checking the compression type, which is the real way
- to tell a hole node. */
- if (frag->ofs & (PAGE_CACHE_SIZE-1) && frag_prev(frag) && frag_prev(frag)->size < PAGE_CACHE_SIZE) {
- printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had a previous non-hole frag in the same page. Tell dwmw2\n",
- ref_offset(raw));
- bitched = 1;
- }
-
- if ((frag->ofs+frag->size) & (PAGE_CACHE_SIZE-1) && frag_next(frag) && frag_next(frag)->size < PAGE_CACHE_SIZE) {
- printk(KERN_WARNING "REF_PRISTINE node at 0x%08x (%08x-%08x) had a following non-hole frag in the same page. Tell dwmw2\n",
- ref_offset(raw), frag->ofs, frag->ofs+frag->size);
- bitched = 1;
- }
- if (bitched) {
- struct jffs2_node_frag *thisfrag;
-
- printk(KERN_WARNING "Inode is #%u, i_size %zd\n", ic->ino, inode->i_size);
- thisfrag = frag_first(&f->fragtree);
- while (thisfrag) {
- if (!thisfrag->node) {
- printk("Frag @0x%x-0x%x; node-less hole\n",
- thisfrag->ofs, thisfrag->size + thisfrag->ofs);
- } else if (!thisfrag->node->raw) {
- printk("Frag @0x%x-0x%x; raw-less hole\n",
- thisfrag->ofs, thisfrag->size + thisfrag->ofs);
- } else {
- printk("Frag @0x%x-0x%x; raw at 0x%08x(%d) (0x%x-0x%x)\n",
- thisfrag->ofs, thisfrag->size + thisfrag->ofs,
- ref_offset(thisfrag->node->raw), ref_flags(thisfrag->node->raw),
- thisfrag->node->ofs, thisfrag->node->ofs+thisfrag->node->size);
- }
- thisfrag = frag_next(thisfrag);
- }
- mark_ref_normal(raw);
- }
- }
-#endif
if (!nrfrags++)
start = frag->ofs;
if (nrfrags == frag->node->frags)
Index: nodelist.h
===================================================================
RCS file: /home/cvs/mtd/fs/jffs2/nodelist.h,v
retrieving revision 1.109
retrieving revision 1.110
diff -u -r1.109 -r1.110
--- nodelist.h 28 Oct 2003 17:21:01 -0000 1.109
+++ nodelist.h 2 Nov 2003 19:28:42 -0000 1.110
@@ -330,13 +330,13 @@
#define PAD(x) (((x)+3)&~3)
-static inline int jffs2_raw_ref_to_inum(struct jffs2_raw_node_ref *raw)
+static inline struct jffs2_inode_cache *jffs2_raw_ref_to_ic(struct jffs2_raw_node_ref *raw)
{
while(raw->next_in_ino) {
raw = raw->next_in_ino;
}
- return ((struct jffs2_inode_cache *)raw)->ino;
+ return ((struct jffs2_inode_cache *)raw);
}
static inline struct jffs2_node_frag *frag_first(struct rb_root *root)
More information about the linux-mtd-cvs
mailing list