mtd/fs/jffs2 gc.c,1.123,1.124 nodelist.h,1.109,1.110

Sun Nov 2 14:28:45 EST 2003

Update of /home/cvs/mtd/fs/jffs2
In directory phoenix.infradead.org:/tmp/cvs-serv19651

Modified Files:
	gc.c nodelist.h 
Log Message:
Fix GC races.


Index: gc.c
===================================================================
RCS file: /home/cvs/mtd/fs/jffs2/gc.c,v
retrieving revision 1.123
retrieving revision 1.124
diff -u -r1.123 -r1.124

--- gc.c	2 Nov 2003 12:30:16 -0000	1.123
+++ gc.c	2 Nov 2003 19:28:42 -0000	1.124
@@ -115,7 +115,6 @@
 	struct jffs2_inode_cache *ic;
 	struct jffs2_eraseblock *jeb;
 	struct jffs2_raw_node_ref *raw;
-	uint32_t inum;
 	int ret = 0;
 
 	if (down_interruptible(&c->alloc_sem))
@@ -223,17 +222,21 @@
 			
 	while(ref_obsolete(raw)) {
 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
-		jeb->gc_node = raw = raw->next_phys;
-		if (!raw) {
+		raw = raw->next_phys;
+		if (unlikely(!raw)) {
 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", 
 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
+			jeb->gc_node = raw;
 			spin_unlock(&c->erase_completion_lock);
 			up(&c->alloc_sem);
 			BUG();
 		}
 	}
+	jeb->gc_node = raw;
+
 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
+
 	if (!raw->next_in_ino) {
 		/* Inode-less node. Clean marker, snapshot or something like that */
 		/* FIXME: If it's something that needs to be copied, including something
@@ -243,13 +246,15 @@
 		up(&c->alloc_sem);
 		goto eraseit_lock;
 	}
-						     
-	inum = jffs2_raw_ref_to_inum(raw);
-	D1(printk(KERN_DEBUG "Inode number is #%u\n", inum));
+
+	ic = jffs2_raw_ref_to_ic(raw);
+
+	/* We need to hold the inocache */
+	spin_lock(&c->inocache_lock);
 
 	spin_unlock(&c->erase_completion_lock);
 
-	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), inum));
+	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
 
 	/* Three possibilities:
 	   1. Inode is already in-core. We must iget it and do proper
@@ -259,11 +264,6 @@
 	   3. Inode is not in-core, node is not pristine. We must iget()
 	      and take the slow path.
 	*/
-	spin_lock(&c->inocache_lock);
-	ic = jffs2_get_ino_cache(c, inum);
-
-	/* This should never fail unless I'm particularly stupid.
-	   So we don't check before dereferencing it */
 
 	switch(ic->state) {
 	case INO_STATE_CHECKEDABSENT:
@@ -275,7 +275,7 @@
 			ic->state = INO_STATE_GC;
 		else {
 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n", 
-				  inum));
+				  ic->ino));
 		}
 		break;
 
@@ -306,7 +306,7 @@
 
 		up(&c->alloc_sem);
 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
-			  inum, ic->state));
+			  ic->ino, ic->state));
 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 		/* And because we dropped the alloc_sem we must start again from the 
 		   beginning. Ponder chance of livelock here -- we're returning success
@@ -321,14 +321,14 @@
 		return 0;
 	}
 
-	spin_unlock(&c->inocache_lock);
-
 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
 	   node intact, and we don't have to muck about with the fragtree etc. 
 	   because we know it's not in-core. If it _was_ in-core, we go through
 	   all the iget() crap anyway */
 
 	if (ic->state == INO_STATE_GC) {
+		spin_unlock(&c->inocache_lock);
+
 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
 
@@ -338,6 +338,13 @@
 		/* Fall through if it wanted us to */
 	}
 
+	/* FIXME: We still have the inocache_lock held. This is ugly.
+	   It's done to prevent the fairly unlikely race where the
+	   gcblock is entirely obsoleted by the final close of a file
+	   which had the only valid nodes in the block, followed by
+	   erasure, followed by freeing of the ic because the erased
+	   block(s) held _all_ the nodes of that inode.... never been
+	   seen but it's vaguely possible. */
 	ret = jffs2_garbage_collect_live(c, jeb, raw, ic);
 
  release_sem:
@@ -373,6 +380,10 @@
 	int ret = 0;
 
 	if (!ic->nlink) {
+		int inum = ic->ino;
+
+		spin_unlock(&c->inocache_lock);
+
 		/* The inode has zero nlink but its nodes weren't yet marked
 		   obsolete. This has to be because we're still waiting for 
 		   the final (close() and) iput() to happen.
@@ -387,12 +398,17 @@
 		   holding the alloc_sem, and jffs2_do_unlink() would also
 		   need that while decrementing nlink on any inode.
 		*/
-		inode = ilookup(OFNI_BS_2SFFJ(c), ic->ino);
+		inode = ilookup(OFNI_BS_2SFFJ(c), inum);
 		if (!inode) {
 			D1(printk(KERN_DEBUG "ilookup() failed for ino #%u; inode is probably deleted.\n",
-				  ic->ino));
+				  inum));
 
 			spin_lock(c->inocache_lock);
+			ic = jffs2_get_ino_cache(c, inum);
+			if (!ic) {
+				D1(printk(KERN_DEBUG "Inode cache for ino #%u is gone.\n", inum));
+				return 0;
+			}
 			if (ic->state != INO_STATE_CHECKEDABSENT) {
 				/* Wait for progress. Don't just loop */
 				D1(printk(KERN_DEBUG "Waiting for ino #%u in state %d\n",
@@ -404,6 +420,8 @@
 			return 0;
 		}
 	} else {
+		spin_unlock(&c->inocache_lock);
+
 		/* Inode has links to it still; they're not going away because
 		   jffs2_do_unlink() would need the alloc_sem and we have it.
 		   Just iget() it, and if read_inode() is necessary that's OK.
@@ -413,7 +431,8 @@
 			return -ENOMEM;
 	}
 	if (is_bad_inode(inode)) {
-		printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d, state %d\n", ic->ino, ic->nlink, ic->state);
+		printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d, state %d\n",
+		       ic->ino, ic->nlink, ic->state);
 		/* NB. This will happen again. We need to do something appropriate here. */
 		ret = -EIO;
 		goto put_out;
@@ -425,11 +444,21 @@
 	/* Now we have the lock for this inode. Check that it's still the one at the head
 	   of the list. */
 
+	spin_lock(&c->erase_completion_lock);
+
+	if (c->gcblock != jeb) {
+		spin_unlock(&erase_completion_lock);
+		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
+		goto upnout;
+	}
 	if (ref_obsolete(raw)) {
+		spin_unlock(&erase_completion_lock);
 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
 		/* They'll call again */
 		goto upnout;
 	}
+	spin_unlock(&erase_completion_lock);
+
 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
 	if (f->metadata && f->metadata->raw == raw) {
 		fn = f->metadata;
@@ -442,54 +471,6 @@
 		if (frag->node && frag->node->raw == raw) {
 			fn = frag->node;
 			end = frag->ofs + frag->size;
-#if 0 /* Checked in readinode.c now anyway. */
-			if (!nrfrags && ref_flags(fn->raw) == REF_PRISTINE) {
-				int bitched = 0;
-
-				if (fn->frags > 1) {
-					printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had %d frags. Tell dwmw2\n", ref_offset(raw), fn->frags);
-					bitched = 1;
-				}
-				/* A hole node which isn't multi-page should be garbage-collected
-				   and merged anyway, so we just check for the frag size here,
-				   rather than mucking around with actually reading the node
-				   and checking the compression type, which is the real way
-				   to tell a hole node. */
-				if (frag->ofs & (PAGE_CACHE_SIZE-1) && frag_prev(frag) && frag_prev(frag)->size < PAGE_CACHE_SIZE) {
-					printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had a previous non-hole frag in the same page. Tell dwmw2\n",
-					       ref_offset(raw));
-					bitched = 1;
-				}
-
-				if ((frag->ofs+frag->size) & (PAGE_CACHE_SIZE-1) && frag_next(frag) && frag_next(frag)->size < PAGE_CACHE_SIZE) {
-					printk(KERN_WARNING "REF_PRISTINE node at 0x%08x (%08x-%08x) had a following non-hole frag in the same page. Tell dwmw2\n",
-					       ref_offset(raw), frag->ofs, frag->ofs+frag->size);
-					bitched = 1;
-				}
-				if (bitched) {
-					struct jffs2_node_frag *thisfrag;
-
-					printk(KERN_WARNING "Inode is #%u, i_size %zd\n", ic->ino, inode->i_size);	
-					thisfrag = frag_first(&f->fragtree);
-					while (thisfrag) {
-						if (!thisfrag->node) {
-							printk("Frag @0x%x-0x%x; node-less hole\n",
-							       thisfrag->ofs, thisfrag->size + thisfrag->ofs);
-						} else if (!thisfrag->node->raw) {
-							printk("Frag @0x%x-0x%x; raw-less hole\n",
-							       thisfrag->ofs, thisfrag->size + thisfrag->ofs);
-						} else {
-							printk("Frag @0x%x-0x%x; raw at 0x%08x(%d) (0x%x-0x%x)\n",
-							       thisfrag->ofs, thisfrag->size + thisfrag->ofs,
-							       ref_offset(thisfrag->node->raw), ref_flags(thisfrag->node->raw),
-							       thisfrag->node->ofs, thisfrag->node->ofs+thisfrag->node->size);
-						}
-						thisfrag = frag_next(thisfrag);
-					}
-					mark_ref_normal(raw);
-				}
-			}
-#endif
 			if (!nrfrags++)
 				start = frag->ofs;
 			if (nrfrags == frag->node->frags)

Index: nodelist.h
===================================================================
RCS file: /home/cvs/mtd/fs/jffs2/nodelist.h,v
retrieving revision 1.109
retrieving revision 1.110
diff -u -r1.109 -r1.110
--- nodelist.h	28 Oct 2003 17:21:01 -0000	1.109
+++ nodelist.h	2 Nov 2003 19:28:42 -0000	1.110
@@ -330,13 +330,13 @@
 
 #define PAD(x) (((x)+3)&~3)
 
-static inline int jffs2_raw_ref_to_inum(struct jffs2_raw_node_ref *raw)
+static inline struct jffs2_inode_cache *jffs2_raw_ref_to_ic(struct jffs2_raw_node_ref *raw)
 {
 	while(raw->next_in_ino) {
 		raw = raw->next_in_ino;
 	}
 
-	return ((struct jffs2_inode_cache *)raw)->ino;
+	return ((struct jffs2_inode_cache *)raw);
 }
 
 static inline struct jffs2_node_frag *frag_first(struct rb_root *root)