mtd/fs/jffs2 nodelist.c, 1.103, 1.104 nodelist.h, 1.136, 1.137 readinode.c, 1.134, 1.135

Artem Bityuckiy dedekind at infradead.org
Mon Aug 1 08:05:22 EDT 2005


Update of /home/cvs/mtd/fs/jffs2
In directory phoenix.infradead.org:/tmp/cvs-serv31135

Modified Files:
	nodelist.c nodelist.h readinode.c 
Log Message:
[JFFS2] major change of the fragtree build algorithm.

Instead of building fragtree starting from node with the smallest version
number, start from the highest. This helps to avoid reading and checking
obsolete nodes.



Index: nodelist.c
===================================================================
RCS file: /home/cvs/mtd/fs/jffs2/nodelist.c,v
retrieving revision 1.103
retrieving revision 1.104
diff -u -r1.103 -r1.104
--- nodelist.c	31 Jul 2005 08:20:44 -0000	1.103
+++ nodelist.c	1 Aug 2005 12:05:19 -0000	1.104
@@ -59,7 +59,7 @@
 
 	/* We know frag->ofs <= size. That's what lookup does for us */
 	if (frag && frag->ofs != size) {
-		if (frag->ofs+frag->size >= size) {
+		if (frag->ofs+frag->size > size) {
 			JFFS2_DBG_FRAGTREE2("truncating frag 0x%08x-0x%08x\n", frag->ofs, frag->ofs+frag->size);
 			frag->size = size - frag->ofs;
 		}
@@ -73,6 +73,20 @@
 		jffs2_obsolete_node_frag(c, frag);
 		frag = next;
 	}
+
+	if (size == 0)
+		return;
+
+	/* 
+	 * If the last fragment starts at the RAM page boundary, it is
+	 * REF_PRISTINE irrespective of its size.
+	 */
+	frag = frag_last(list);
+	if ((frag->ofs & (PAGE_CACHE_SIZE - 1)) == 0) {
+		JFFS2_DBG_FRAGTREE2("marking the last fragment 0x%08x-0x%08x REF_PRISTINE.\n",
+			frag->ofs, frag->ofs + frag->size); 
+		frag->node->raw->flash_offset = ref_offset(frag->node->raw) | REF_PRISTINE;
+	}
 }
 
 void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this)
@@ -120,14 +134,82 @@
 	rb_link_node(&newfrag->rb, &base->rb, link);
 }
 
+/*
+ * Allocate and initializes a new fragment.
+ */
+static inline struct jffs2_node_frag * new_fragment(struct jffs2_full_dnode *fn, uint32_t ofs, uint32_t size)
+{
+	struct jffs2_node_frag *newfrag;
+	
+	newfrag = jffs2_alloc_node_frag();
+	if (likely(newfrag)) {
+		newfrag->ofs = ofs;
+		newfrag->size = size;
+		newfrag->node = fn;
+	} else {
+		JFFS2_ERROR("cannot allocate a jffs2_node_frag object\n");
+	}
+
+	return newfrag;
+}
+
+/*
+ * Called when there is no overlapping fragment exist. Inserts a hole before the new
+ * fragment and inserts the new fragment to the fragtree.
+ */
+static int no_overlapping_node(struct jffs2_sb_info *c, struct rb_root *root,
+		 	       struct jffs2_node_frag *newfrag,
+			       struct jffs2_node_frag *this, uint32_t lastend)
+{
+	if (lastend < newfrag->node->ofs) {
+		/* put a hole in before the new fragment */
+		struct jffs2_node_frag *holefrag;
+
+		holefrag= new_fragment(NULL, lastend, newfrag->node->ofs - lastend);
+		if (unlikely(!holefrag)) {
+			jffs2_free_node_frag(newfrag);
+			return -ENOMEM;
+		}
+
+		if (this) {
+			/* By definition, the 'this' node has no right-hand child, 
+			   because there are no frags with offset greater than it.
+			   So that's where we want to put the hole */
+			JFFS2_DBG_FRAGTREE2("add hole frag %u-%u on the right of the new frag.\n",
+				holefrag->ofs, holefrag->ofs + holefrag->size);
+			rb_link_node(&holefrag->rb, &this->rb, &this->rb.rb_right);
+		} else {
+			JFFS2_DBG_FRAGTREE2("Add hole frag %u-%u to the root of the tree.\n",
+				holefrag->ofs, holefrag->ofs + holefrag->size);
+			rb_link_node(&holefrag->rb, NULL, &root->rb_node);
+		}
+		rb_insert_color(&holefrag->rb, root);
+		this = holefrag;
+	}
+	
+	if (this) {
+		/* By definition, the 'this' node has no right-hand child, 
+		   because there are no frags with offset greater than it.
+		   So that's where we want to put new fragment */
+		JFFS2_DBG_FRAGTREE2("add the new node at the right\n");
+		rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);			
+	} else {
+		JFFS2_DBG_FRAGTREE2("insert the new node at the root of the tree\n");
+		rb_link_node(&newfrag->rb, NULL, &root->rb_node);
+	}
+	rb_insert_color(&newfrag->rb, root);
+
+	return 0;
+}
+
 /* Doesn't set inode->i_size */
-static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *list, struct jffs2_node_frag *newfrag)
+static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *root, struct jffs2_node_frag *newfrag)
 {
 	struct jffs2_node_frag *this;
 	uint32_t lastend;
 
 	/* Skip all the nodes which are completed before this one starts */
-	this = jffs2_lookup_node_frag(list, newfrag->node->ofs);
+	this = jffs2_lookup_node_frag(root, newfrag->node->ofs);
 
 	if (this) {
 		JFFS2_DBG_FRAGTREE2("lookup gave frag 0x%04x-0x%04x; phys 0x%08x (*%p)\n",
@@ -138,7 +220,7 @@
 		lastend = 0;
 	}
 			  
-	/* See if we ran off the end of the list */
+	/* See if we ran off the end of the fragtree */
 	if (lastend <= newfrag->ofs) {
 		/* We did */
 
@@ -152,45 +234,16 @@
 			mark_ref_normal(newfrag->node->raw);
 		}
 
-		if (lastend < newfrag->node->ofs) {
-			/* ... and we need to put a hole in before the new node */
-			struct jffs2_node_frag *holefrag = jffs2_alloc_node_frag();
-			if (!holefrag) {
-				jffs2_free_node_frag(newfrag);
-				return -ENOMEM;
-			}
-			holefrag->ofs = lastend;
-			holefrag->size = newfrag->node->ofs - lastend;
-			holefrag->node = NULL;
-			if (this) {
-				/* By definition, the 'this' node has no right-hand child, 
-				   because there are no frags with offset greater than it.
-				   So that's where we want to put the hole */
-				JFFS2_DBG_FRAGTREE2("adding hole frag (%p) on right of node at (%p)\n", holefrag, this);
-				rb_link_node(&holefrag->rb, &this->rb, &this->rb.rb_right);
-			} else {
-				JFFS2_DBG_FRAGTREE2("adding hole frag (%p) at root of tree\n", holefrag);
-				rb_link_node(&holefrag->rb, NULL, &list->rb_node);
-			}
-			rb_insert_color(&holefrag->rb, list);
-			this = holefrag;
-		}
-		if (this) {
-			/* By definition, the 'this' node has no right-hand child, 
-			   because there are no frags with offset greater than it.
-			   So that's where we want to put new fragment */
-			JFFS2_DBG_FRAGTREE2("adding new frag (%p) on right of node at (%p)\n", newfrag, this);
-			rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);			
-		} else {
-			JFFS2_DBG_FRAGTREE2("adding new frag (%p) at root of tree\n", newfrag);
-			rb_link_node(&newfrag->rb, NULL, &list->rb_node);
-		}
-		rb_insert_color(&newfrag->rb, list);
-		return 0;
+		return no_overlapping_node(c, root, newfrag, this, lastend);
 	}
 
-	JFFS2_DBG_FRAGTREE2("dealing with frag 0x%04x-0x%04x; phys 0x%08x (*%p)\n", 
-		  this->ofs, this->ofs+this->size, this->node?(ref_offset(this->node->raw)):0xffffffff, this);
+	if (this->node)
+		JFFS2_DBG_FRAGTREE2("dealing with frag %u-%u, phys %#08x(%d).\n",
+		this->ofs, this->ofs + this->size,
+		ref_offset(this->node->raw), ref_flags(this->node->raw));
+	else
+		JFFS2_DBG_FRAGTREE2("dealing with hole frag %u-%u.\n",
+		this->ofs, this->ofs + this->size);
 
 	/* OK. 'this' is pointing at the first frag that newfrag->ofs at least partially obsoletes,
 	 * - i.e. newfrag->ofs < this->ofs+this->size && newfrag->ofs >= this->ofs  
@@ -206,11 +259,8 @@
 
 		if (this->ofs + this->size > newfrag->ofs + newfrag->size) {
 			/* The new node splits 'this' frag into two */
-			struct jffs2_node_frag *newfrag2 = jffs2_alloc_node_frag();
-			if (!newfrag2) {
-				jffs2_free_node_frag(newfrag);
-				return -ENOMEM;
-			}
+			struct jffs2_node_frag *newfrag2;
+
 			if (this->node)
 				JFFS2_DBG_FRAGTREE2("split old frag 0x%04x-0x%04x, phys 0x%08x\n",
 					this->ofs, this->ofs+this->size, ref_offset(this->node->raw));
@@ -219,9 +269,10 @@
 					this->ofs, this->ofs+this->size, ref_offset(this->node->raw));
 			
 			/* New second frag pointing to this's node */
-			newfrag2->ofs = newfrag->ofs + newfrag->size;
-			newfrag2->size = (this->ofs+this->size) - newfrag2->ofs;
-			newfrag2->node = this->node;
+			newfrag2 = new_fragment(this->node, newfrag->ofs + newfrag->size,
+						this->ofs + this->size - newfrag->ofs - newfrag->size);
+			if (unlikely(!newfrag2))
+				return -ENOMEM;
 			if (this->node)
 				this->node->frags++;
 
@@ -235,10 +286,10 @@
 			   'this' to insert newfrag, and a tree insert
 			   from newfrag to insert newfrag2. */
 			jffs2_fragtree_insert(newfrag, this);
-			rb_insert_color(&newfrag->rb, list);
+			rb_insert_color(&newfrag->rb, root);
 			
 			jffs2_fragtree_insert(newfrag2, newfrag);
-			rb_insert_color(&newfrag2->rb, list);
+			rb_insert_color(&newfrag2->rb, root);
 			
 			return 0;
 		}
@@ -247,14 +298,14 @@
 
 		/* Again, we know it lives down here in the tree */
 		jffs2_fragtree_insert(newfrag, this);
-		rb_insert_color(&newfrag->rb, list);
+		rb_insert_color(&newfrag->rb, root);
 	} else {
 		/* New frag starts at the same point as 'this' used to. Replace 
 		   it in the tree without doing a delete and insertion */
 		JFFS2_DBG_FRAGTREE2("inserting newfrag (*%p),%d-%d in before 'this' (*%p),%d-%d\n",
 			  newfrag, newfrag->ofs, newfrag->ofs+newfrag->size, this, this->ofs, this->ofs+this->size);
 	
-		rb_replace_node(&this->rb, &newfrag->rb, list);
+		rb_replace_node(&this->rb, &newfrag->rb, root);
 		
 		if (newfrag->ofs + newfrag->size >= this->ofs+this->size) {
 			JFFS2_DBG_FRAGTREE2("obsoleting node frag %p (%x-%x)\n", this, this->ofs, this->ofs+this->size);
@@ -264,7 +315,7 @@
 			this->size -= newfrag->size;
 
 			jffs2_fragtree_insert(this, newfrag);
-			rb_insert_color(&this->rb, list);
+			rb_insert_color(&this->rb, root);
 			return 0;
 		}
 	}
@@ -275,15 +326,15 @@
 		/* 'this' frag is obsoleted completely. */
 		JFFS2_DBG_FRAGTREE2("obsoleting node frag %p (%x-%x) and removing from tree\n",
 			this, this->ofs, this->ofs+this->size);
-		rb_erase(&this->rb, list);
+		rb_erase(&this->rb, root);
 		jffs2_obsolete_node_frag(c, this);
 	}
 	/* Now we're pointing at the first frag which isn't totally obsoleted by 
 	   the new frag */
 
-	if (!this || newfrag->ofs + newfrag->size == this->ofs) {
+	if (!this || newfrag->ofs + newfrag->size == this->ofs)
 		return 0;
-	}
+
 	/* Still some overlap but we don't need to move it in the tree */
 	this->size = (this->ofs + this->size) - (newfrag->ofs + newfrag->size);
 	this->ofs = newfrag->ofs + newfrag->size;
@@ -296,8 +347,9 @@
 	return 0;
 }
 
-/* Given an inode, probably with existing list of fragments, add the new node
- * to the fragment list.
+/* 
+ * Given an inode, probably with existing tree of fragments, add the new node
+ * to the fragment tree.
  */
 int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 {
@@ -307,18 +359,14 @@
 	if (unlikely(!fn->size))
 		return 0;
 
-	newfrag = jffs2_alloc_node_frag();
+	newfrag = new_fragment(fn, fn->ofs, fn->size);
 	if (unlikely(!newfrag))
 		return -ENOMEM;
+	newfrag->node->frags = 1;
 
 	JFFS2_DBG_FRAGTREE("adding node %#04x-%#04x @0x%08x on flash, newfrag *%p\n",
 		  fn->ofs, fn->ofs+fn->size, ref_offset(fn->raw), newfrag);
 	
-	newfrag->ofs = fn->ofs;
-	newfrag->size = fn->size;
-	newfrag->node = fn;
-	newfrag->node->frags = 1;
-
 	ret = jffs2_add_frag_to_fragtree(c, &f->fragtree, newfrag);
 	if (unlikely(ret))
 		return ret;
@@ -344,10 +392,465 @@
 		}
 	}
 	jffs2_dbg_fragtree_paranoia_check_nolock(f);
-	jffs2_dbg_dump_fragtree_nolock(f);
+
 	return 0;
 }
 
+/*
+ * Check the data CRC of the node.
+ *
+ * Returns: 0 if the data CRC is correct;
+ * 	    1 - if incorrect;
+ *	    error code if an error occured.
+ */
+static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
+{
+	struct jffs2_raw_node_ref *ref = tn->fn->raw;
+	int err = 0, pointed = 0;
+	struct jffs2_eraseblock *jeb;
+	unsigned char *buffer;
+	uint32_t crc, ofs, retlen, len;
+
+	BUG_ON(tn->csize == 0);
+
+	/* Calculate how many bytes were already checked */
+	ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode);
+	len = ofs - (ofs & (PAGE_CACHE_SIZE - 1));
+	len = c->wbuf_pagesize - len;
+
+	if (len >= tn->csize) {
+		JFFS2_DBG_READINODE("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n",
+			ref_offset(ref), tn->csize, ofs);
+		goto adj_acc;
+	}
+	
+	ofs += len;
+	len = tn->csize - len;
+	
+	JFFS2_DBG_READINODE("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n",
+		ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len);
+	
+#ifndef __ECOS
+	/* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
+	 * adding and jffs2_flash_read_end() interface. */
+	if (c->mtd->point) {
+		err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
+		if (!err && retlen < tn->csize) {
+			JFFS2_WARNING("MTD point returned len too short: %u instead of %u.\n", retlen, tn->csize);
+			c->mtd->unpoint(c->mtd, buffer, ofs, len);
+		} else if (err)
+			JFFS2_WARNING("MTD point failed: error code %d.\n", err);
+		else
+			pointed = 1; /* succefully pointed to device */
+	}
+#endif
+	
+	if (!pointed) {
+		buffer = kmalloc(len, GFP_KERNEL);
+		if (unlikely(!buffer))
+			return -ENOMEM;
+			
+		/* TODO: this is very frequent pattern, make it a separate
+		 * routine */
+		err = jffs2_flash_read(c, ofs, len, &retlen, buffer);
+		if (err) {
+			JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ofs, err);
+			goto free_out;
+		}
+			
+		if (retlen != len) {
+			JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ofs, retlen, len);
+			err = -EIO;
+			goto free_out;
+		}
+	}
+
+	/* Continue calculating CRC */
+	crc = crc32(tn->partial_crc, buffer, len);
+	if(!pointed)
+		kfree(buffer);
+#ifndef __ECOS
+	else
+		c->mtd->unpoint(c->mtd, buffer, ofs, len);
+#endif
+
+	if (crc != tn->data_crc) {
+		JFFS2_NOTICE("drong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
+			ofs, tn->data_crc, crc);
+		return 1;
+	}
+
+adj_acc:
+	jeb = &c->blocks[ref->flash_offset / c->sector_size];
+	len = ref_totlen(c, jeb, ref);
+
+	/* 
+	 * Mark the node as having been checked and fix the
+	 * accounting accordingly.
+	 */
+	spin_lock(&c->erase_completion_lock);
+	jeb->used_size += len;
+	jeb->unchecked_size -= len;
+	c->used_size += len;
+	c->unchecked_size -= len;
+	spin_unlock(&c->erase_completion_lock);
+
+	return 0;
+
+free_out:
+	if(!pointed)
+		kfree(buffer);
+#ifndef __ECOS
+	else
+		c->mtd->unpoint(c->mtd, buffer, ofs, len);
+#endif
+	return err;
+}
+
+/*
+ * Helper function for jffs2_add_older_frag_to_fragtree().
+ *
+ * Checks the node if we are in the checking stage.
+ */
+static inline int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn)
+{
+	int ret;
+	
+	BUG_ON(ref_obsolete(tn->fn->raw));
+
+	/* We only check the data CRC of unchecked nodes */
+	if (ref_flags(tn->fn->raw) != REF_UNCHECKED)
+		return 0;
+	
+	JFFS2_DBG_FRAGTREE2("check node %u-%u, phys offs %#08x.\n",
+		tn->fn->ofs, tn->fn->ofs + tn->fn->size,
+		ref_offset(tn->fn->raw));
+
+	ret = check_node_data(c, tn);
+	if (unlikely(ret < 0)) {
+		JFFS2_ERROR("check_node_data() returned error: %d.\n",
+			ret);
+	} else if (unlikely(ret > 0)) {
+		JFFS2_DBG_FRAGTREE2("CRC error, mark it obsolete.\n");
+		jffs2_mark_node_obsolete(c, tn->fn->raw);
+	}
+
+	return ret;
+}
+
+/* 
+ * Helper function for jffs2_add_older_frag_to_fragtree().
+ *
+ * Called when the new fragment that is being inserted
+ * splits a hole fragment.
+ */
+static int split_hole(struct jffs2_sb_info *c, struct rb_root *root,
+		      struct jffs2_node_frag *newfrag, struct jffs2_node_frag *hole)
+{
+	JFFS2_DBG_FRAGTREE2("fragment %#04x-%#04x splits the hole %#04x-%#04x\n",
+		newfrag->ofs, newfrag->ofs + newfrag->size, hole->ofs, hole->ofs + hole->size);
+
+	if (hole->ofs == newfrag->ofs) {
+		/* 
+		 * Well, the new fragment actually starts at the same offset as
+		 * the hole.
+		 */
+		if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
+			/* 
+			 * We replace the overlapped left part of the hole by
+			 * the new node.
+			 */
+			
+			JFFS2_DBG_FRAGTREE2("insert fragment %#04x-%#04x and cut the left part of the hole\n",
+				newfrag->ofs, newfrag->ofs + newfrag->size);
+			rb_replace_node(&hole->rb, &newfrag->rb, root);
+			
+			hole->ofs += newfrag->size;
+			hole->size -= newfrag->size;
+			
+			/* 
+			 * We know that 'hole' should be the right hand
+			 * fragment.
+			 */
+			jffs2_fragtree_insert(hole, newfrag);
+			rb_insert_color(&hole->rb, root);
+		} else {
+			/* 
+			 * Ah, the new fragment is of the same size as the hole.
+			 * Relace the hole by it.
+			 */
+			JFFS2_DBG_FRAGTREE2("insert fragment %#04x-%#04x and overwrite hole\n",
+				newfrag->ofs, newfrag->ofs + newfrag->size);
+			rb_replace_node(&hole->rb, &newfrag->rb, root);
+			jffs2_free_node_frag(hole);
+		}
+	} else {
+		/* The new fragment lefts some hole space at the left */
+		
+		struct jffs2_node_frag * newfrag2 = NULL;
+
+		if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
+			/* The new frag also lefts some space at the right */
+			newfrag2 = new_fragment(NULL, newfrag->ofs +
+				newfrag->size, hole->ofs + hole->size
+				- newfrag->ofs - newfrag->size);
+			if (unlikely(!newfrag2)) {
+				jffs2_free_node_frag(newfrag);
+				return -ENOMEM;
+			}
+		}
+
+		hole->size = newfrag->ofs - hole->ofs;
+		JFFS2_DBG_FRAGTREE2("left the hole %#04x-%#04x at the left and inserd fragment %#04x-%#04x\n",
+			hole->ofs, hole->ofs + hole->size, newfrag->ofs, newfrag->ofs + newfrag->size);
+
+		jffs2_fragtree_insert(newfrag, hole);
+		rb_insert_color(&newfrag->rb, root);
+		
+		if (newfrag2) {
+			JFFS2_DBG_FRAGTREE2("left the hole %#04x-%#04x at the right\n",
+				newfrag2->ofs, newfrag2->ofs + newfrag2->size);
+			jffs2_fragtree_insert(newfrag2, newfrag);
+			rb_insert_color(&newfrag2->rb, root);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * This function is used when we build inode. It expects the nodes are passed
+ * in the decreasing version order. The whole point of this is to improve the
+ * inodes checking on NAND: we check the nodes' data CRC only when they are not
+ * obsoleted. Previously, add_frag_to_fragtree() function was used and
+ * nodes were passed to it in the increasing version ordes and CRCs of all
+ * nodes were checked.
+ *
+ * Note: tn->fn->size shouldn't be zero.
+ *
+ * Returns 0 if the node was inserted
+ *         1 if it wasn't inserted (since it is obsolete)
+ *         < 0 an if error occured
+ */
+int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
+				     struct jffs2_tmp_dnode_info *tn)
+{
+	struct jffs2_node_frag *this, *newfrag;
+	uint32_t lastend;
+	struct jffs2_full_dnode *fn = tn->fn;
+	struct rb_root *root = &f->fragtree;
+	uint32_t fn_size = fn->size, fn_ofs = fn->ofs;
+	int err, checked = 0;
+	int ref_flag;
+
+	JFFS2_DBG_FRAGTREE("insert fragment %#04x-%#04x\n", fn_ofs, fn_ofs + fn_size);
+
+	/* Skip all the nodes which are completed before this one starts */
+	this = jffs2_lookup_node_frag(root, fn_ofs);
+	if (this)
+		JFFS2_DBG_FRAGTREE2("'this' found %#04x-%#04x (%s)\n", this->ofs, this->ofs + this->size, this->node ? "data" : "hole");
+
+	if (this)
+		lastend = this->ofs + this->size;
+	else
+		lastend = 0;
+	
+	/* Detect the preliminary type of node */
+	if (fn->size >= PAGE_CACHE_SIZE)
+		ref_flag = REF_PRISTINE;
+	else
+		ref_flag = REF_NORMAL;
+	
+	/* See if we ran off the end of the root */
+	if (lastend <= fn_ofs) {
+		/* We did */
+		
+		/* 
+		 * We are going to insert the new node into the
+		 * fragment tree, so check it.
+		 */
+		err = check_node(c, f, tn);
+		if (err != 0)
+			return err;
+
+		fn->frags = 1;
+
+		newfrag = new_fragment(fn, fn_ofs, fn_size);
+		if (unlikely(!newfrag))
+			return -ENOMEM;
+
+		err = no_overlapping_node(c, root, newfrag, this, lastend);
+		if (unlikely(err != 0)) {
+			jffs2_free_node_frag(newfrag);
+			return err;
+		}
+
+		goto out_ok;
+	}
+
+	fn->frags = 0;
+
+	while (1) {
+		/* 
+		 * Here we have:
+		 * fn_ofs < this->ofs + this->size && fn_ofs >= this->ofs.
+		 * 
+		 * Remember, 'this' has higher version, any non-hole node
+		 * which is already in the fragtree is newer then the newly
+		 * inserted.
+		 */
+		if (!this->node) {
+			/* 
+			 * 'this' is the hole fragment, so at least the
+			 * beginning of the new fragment is valid.
+			 */
+			
+			/* 
+			 * We are going to insert the new node into the
+			 * fragment tree, so check it.
+			 */
+			if (!checked) {
+				err = check_node(c, f, tn);
+				if (unlikely(err != 0))
+					return err;
+				checked = 1;
+			}
+			
+			if (this->ofs + this->size >= fn_ofs + fn_size) {
+				/* We split the hole on two parts */
+
+				fn->frags += 1;
+				newfrag = new_fragment(fn, fn_ofs, fn_size);
+				if (unlikely(!newfrag))
+					return -ENOMEM;
+
+				err = split_hole(c, root, newfrag, this);
+				if (unlikely(err))
+					return err;
+				goto out_ok;
+			}
+
+			/* 
+			 * The beginning of the new fragment is valid since it
+			 * overlaps the hole node.
+			 */
+
+			ref_flag = REF_NORMAL;
+
+			fn->frags += 1;
+			newfrag = new_fragment(fn, fn_ofs,
+					this->ofs + this->size - fn_ofs);
+			if (unlikely(!newfrag))
+				return -ENOMEM;
+			
+			if (fn_ofs == this->ofs) {
+				/* 
+				 * The new node starts at the same offset as
+				 * the hole and supersieds the hole.
+				 */
+				JFFS2_DBG_FRAGTREE2("add the new fragment instead of hole %#04x-%#04x, refcnt %d\n",
+					fn_ofs, fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
+
+				rb_replace_node(&this->rb, &newfrag->rb, root);
+				jffs2_free_node_frag(this);
+			} else {
+				/* 
+				 * The hole becomes shorter as its right part
+				 * is supersieded by the new fragment.
+				 */
+				JFFS2_DBG_FRAGTREE2("reduce size of hole %#04x-%#04x to %#04x-%#04x\n",
+					this->ofs, this->ofs + this->size, this->ofs, this->ofs + this->size - newfrag->size);
+				
+				JFFS2_DBG_FRAGTREE2("add new fragment %#04x-%#04x, refcnt %d\n", fn_ofs,
+					fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
+	
+				this->size -= newfrag->size;
+				jffs2_fragtree_insert(newfrag, this);
+				rb_insert_color(&newfrag->rb, root);
+			}
+			
+			fn_ofs += newfrag->size;
+			fn_size -= newfrag->size;
+			this = rb_entry(rb_next(&newfrag->rb),
+					struct jffs2_node_frag, rb);
+
+			JFFS2_DBG_FRAGTREE2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
+				this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
+		}
+
+		/* 
+		 * 'This' node is not the hole so it obsoletes the new fragment
+		 * either fully or partially.
+		 */
+		if (this->ofs + this->size >= fn_ofs + fn_size) {
+			/* The new node is obsolete, drop it */
+			if (fn->frags == 0) {
+				JFFS2_DBG_FRAGTREE2("%#04x-%#04x is obsolete, mark it obsolete\n", fn_ofs, fn_ofs + fn_size);
+				ref_flag = REF_OBSOLETE;
+			}
+			goto out_ok;
+		} else {
+			struct jffs2_node_frag *new_this;
+			
+			/* 'This' node obsoletes the beginning of the new node */
+			JFFS2_DBG_FRAGTREE2("the beginning %#04x-%#04x is obsolete\n", fn_ofs, this->ofs + this->size);
+
+			ref_flag = REF_NORMAL;
+			
+			fn_size -= this->ofs + this->size - fn_ofs;
+			fn_ofs = this->ofs + this->size;
+			JFFS2_DBG_FRAGTREE2("now considering %#04x-%#04x\n", fn_ofs, fn_ofs + fn_size);
+			
+			new_this = rb_entry(rb_next(&this->rb), struct jffs2_node_frag, rb);
+			if (!new_this) {
+				/* 
+				 * There is no next fragment. Add the rest of
+				 * the new node as the right-hand child.
+				 */
+				if (!checked) {
+					err = check_node(c, f, tn);
+					if (unlikely(err != 0))
+						return err;
+					checked = 1;
+				}
+				
+				fn->frags += 1;
+				newfrag = new_fragment(fn, fn_ofs, fn_size);
+				if (unlikely(!newfrag))
+					return -ENOMEM;
+
+				JFFS2_DBG_FRAGTREE2("there are no more fragments, insert %#04x-%#04x\n",
+					newfrag->ofs, newfrag->ofs + newfrag->size);
+				rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);			
+				rb_insert_color(&newfrag->rb, root);
+				goto out_ok;
+			} else {
+				this = new_this;
+				JFFS2_DBG_FRAGTREE2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
+					this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
+			}
+		}
+	}
+
+out_ok:
+	BUG_ON(fn->size < PAGE_CACHE_SIZE && ref_flag == REF_PRISTINE);
+
+	if (ref_flag == REF_OBSOLETE) {
+		JFFS2_DBG_FRAGTREE2("the node is obsolete now\n");
+		/* jffs2_mark_node_obsolete() will adjust space accounting */
+		jffs2_mark_node_obsolete(c, fn->raw);
+		return 1;
+	}
+
+	JFFS2_DBG_FRAGTREE2("the node is \"%s\" now\n", ref_flag == REF_NORMAL ? "REF_NORMAL" : "REF_PRISTINE");
+
+	/* Space accounting was adjusted at check_node_data() */
+	spin_lock(&c->erase_completion_lock);
+	fn->raw->flash_offset = ref_offset(fn->raw) | ref_flag;
+	spin_unlock(&c->erase_completion_lock);
+
+	return 0;
+}
 
 void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state)
 {

Index: nodelist.h
===================================================================
RCS file: /home/cvs/mtd/fs/jffs2/nodelist.h,v
retrieving revision 1.136
retrieving revision 1.137
diff -u -r1.136 -r1.137
--- nodelist.h	31 Jul 2005 08:20:44 -0000	1.136
+++ nodelist.h	1 Aug 2005 12:05:19 -0000	1.137
@@ -61,6 +61,9 @@
 #error wibble
 #endif
 
+/* The minimal node header size */
+#define JFFS2_MIN_NODE_HEADER sizeof(struct jffs2_raw_dirent)
+
 /*
   This is all we need to keep in-core for each raw node during normal
   operation. As and when we do read_inode on a particular inode, we can
@@ -148,6 +151,9 @@
 	struct rb_node rb;
 	struct jffs2_full_dnode *fn;
 	uint32_t version;
+	uint32_t data_crc;
+	uint32_t partial_crc;
+	uint32_t csize;
 };       
 
 struct jffs2_full_dirent
@@ -311,6 +317,7 @@
 void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this);
 int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
 void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
+int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
 
 /* nodemgmt.c */
 int jffs2_thread_should_wake(struct jffs2_sb_info *c);

Index: readinode.c
===================================================================
RCS file: /home/cvs/mtd/fs/jffs2/readinode.c,v
retrieving revision 1.134
retrieving revision 1.135
diff -u -r1.134 -r1.135
--- readinode.c	31 Jul 2005 08:20:44 -0000	1.134
+++ readinode.c	1 Aug 2005 12:05:19 -0000	1.135
@@ -21,8 +21,8 @@
 #include <linux/compiler.h>
 #include "nodelist.h"
 
-/* 
- * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in 
+/*
+ * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
  * order of increasing version.
  */
 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
@@ -38,11 +38,11 @@
 		/* There may actually be a collision here, but it doesn't
 		   actually matter. As long as the two nodes with the same
 		   version are together, it's all fine. */
-		if (tn->version < this->version)
+		if (tn->version > this->version)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
-        }
+	}
 
 	rb_link_node(&tn->rb, parent, p);
 	rb_insert_color(&tn->rb, list);
@@ -111,14 +111,9 @@
  * 	    1 if the node should be marked obsolete;
  * 	    negative error code on failure.
  */
-static inline int
-read_direntry(struct jffs2_sb_info *c,
-	      struct jffs2_raw_node_ref *ref,
-	      struct jffs2_raw_dirent *rd,
-	      uint32_t read,
-	      struct jffs2_full_dirent **fdp,
-	      int32_t *latest_mctime,
-	      uint32_t *mctime_ver)
+static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
+				struct jffs2_raw_dirent *rd, uint32_t read, struct jffs2_full_dirent **fdp,
+				uint32_t *latest_mctime, uint32_t *mctime_ver)
 {
 	struct jffs2_full_dirent *fd;
 	
@@ -196,30 +191,35 @@
  * 	    1 if the node should be marked obsolete;
  * 	    negative error code on failure.
  */
-static inline int
-read_dnode(struct jffs2_sb_info *c,
-	   struct jffs2_raw_node_ref *ref,
-	   struct jffs2_raw_inode *rd,
-	   uint32_t read,
-	   struct rb_root *tnp,
-	   int32_t *latest_mctime,
-	   uint32_t *mctime_ver)
+static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
+			     struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
+			     uint32_t *latest_mctime, uint32_t *mctime_ver)
 {
-	struct jffs2_eraseblock *jeb;
 	struct jffs2_tmp_dnode_info *tn;
+	uint32_t len, csize;
+	int ret = 1;
 	
 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
 	BUG_ON(ref_obsolete(ref));
 
+	tn = jffs2_alloc_tmp_dnode_info();
+	if (!tn) {
+		JFFS2_ERROR("failed to allocate tn (%d bytes).\n", sizeof(*tn));
+		return -ENOMEM;
+	}
+
+	tn->partial_crc = 0;
+	csize = je32_to_cpu(rd->csize);
+	
 	/* If we've never checked the CRCs on this node, check them now */
 	if (ref_flags(ref) == REF_UNCHECKED) {
-		uint32_t crc, len;
+		uint32_t crc;
 
 		crc = crc32(0, rd, sizeof(*rd) - 8);
 		if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
 			JFFS2_NOTICE("header CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
 					ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
-			return 1;
+			goto free_out;
 		}
 		
 		/* Sanity checks */
@@ -227,107 +227,102 @@
 		    unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
 				JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
 				jffs2_dbg_dump_node(c, ref_offset(ref));
-			return 1;
-		}
-
-		if (rd->compr != JFFS2_COMPR_ZERO && je32_to_cpu(rd->csize)) {
-			unsigned char *buf = NULL;
-			uint32_t pointed = 0;
-			int err;
-#ifndef __ECOS
-			if (c->mtd->point) {
-				err = c->mtd->point (c->mtd, ref_offset(ref) + sizeof(*rd), je32_to_cpu(rd->csize),
-						     &read, &buf);
-				if (unlikely(read < je32_to_cpu(rd->csize)) && likely(!err)) {
-					JFFS2_ERROR("MTD point returned len too short: 0x%zx\n", read);
-					c->mtd->unpoint(c->mtd, buf, ref_offset(ref) + sizeof(*rd),
-							je32_to_cpu(rd->csize));
-				} else if (unlikely(err)){
-					JFFS2_ERROR("MTD point failed %d\n", err);
-				} else
-					pointed = 1; /* succefully pointed to device */
-			}
-#endif					
-			if(!pointed){
-				buf = kmalloc(je32_to_cpu(rd->csize), GFP_KERNEL);
-				if (!buf)
-					return -ENOMEM;
-				
-				err = jffs2_flash_read(c, ref_offset(ref) + sizeof(*rd), je32_to_cpu(rd->csize),
-							&read, buf);
-				if (unlikely(read != je32_to_cpu(rd->csize)) && likely(!err))
-					err = -EIO;
-				if (err) {
-					kfree(buf);
-					return err;
-				}
-			}
-			crc = crc32(0, buf, je32_to_cpu(rd->csize));
-			if(!pointed)
-				kfree(buf);
-#ifndef __ECOS
-			else
-				c->mtd->unpoint(c->mtd, buf, ref_offset(ref) + sizeof(*rd), je32_to_cpu(rd->csize));
-#endif
-
-			if (crc != je32_to_cpu(rd->data_crc)) {
-				JFFS2_NOTICE("data CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
-					ref_offset(ref), je32_to_cpu(rd->data_crc), crc);
-				return 1;
-			}
-			
+			goto free_out;
 		}
 
-		/* Mark the node as having been checked and fix the accounting accordingly */
-		jeb = &c->blocks[ref->flash_offset / c->sector_size];
-		len = ref_totlen(c, jeb, ref);
-
-		spin_lock(&c->erase_completion_lock);
-		jeb->used_size += len;
-		jeb->unchecked_size -= len;
-		c->used_size += len;
-		c->unchecked_size -= len;
-
-		/* If node covers at least a whole page, or if it starts at the 
-		   beginning of a page and runs to the end of the file, or if 
-		   it's a hole node, mark it REF_PRISTINE, else REF_NORMAL. 
+		if (jffs2_is_writebuffered(c) && csize != 0) {
+			/* At this point we are supposed to check the data CRC
+			 * of our unchecked node. But thus far, we do not
+			 * know whether the node is valid or obsolete. To
+			 * figure this out, we need to walk all the nodes of
+			 * the inode and build the inode fragtree. We don't
+			 * want to spend time checking data of nodes which may
+			 * later be found to be obsolete. So we put off the full
+			 * data CRC checking until we have read all the inode
+			 * nodes and have started building the fragtree.
+			 *
+			 * The fragtree is being built starting with nodes
+			 * having the highest version number, so we'll be able
+			 * to detect whether a node is valid (i.e., it is not
+			 * overlapped by a node with higher version) or not.
+			 * And we'll be able to check only those nodes, which
+			 * are not obsolete.
+			 *
+			 * Of course, this optimization only makes sense in case
+			 * of NAND flashes (or other flashes whith
+			 * !jffs2_can_mark_obsolete()), since on NOR flashes
+			 * nodes are marked obsolete physically.
+			 *
+			 * Since NAND flashes (or other flashes with
+			 * jffs2_is_writebuffered(c)) are anyway read by
+			 * fractions of c->wbuf_pagesize, and we have just read
+			 * the node header, it is likely that the starting part
+			 * of the node data is also read when we read the
+			 * header. So we don't mind to check the CRC of the
+			 * starting part of the data of the node now, and check
+			 * the second part later (in jffs2_check_node_data()).
+			 * Of course, we will not need to re-read and re-check
+			 * the NAND page which we have just read. This is why we
+			 * read the whole NAND page at jffs2_get_inode_nodes(),
+			 * while we needed only the node header.
+			 */
+			unsigned char *buf;
+
+			/* 'buf' will point to the start of data */
+			buf = (unsigned char *)rd + sizeof(*rd);
+			/* len will be the read data length */
+			len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
+			
+			if (len)
+				tn->partial_crc = crc = crc32(0, buf, len);
+
+			/* If we actually calculated the whole data CRC
+			 * and it is wrong, drop the node. */
+			if (unlikely(tn->partial_crc
+			 		!= je32_to_cpu(rd->data_crc)) &&
+				len == csize)
+				goto free_out;
 
-		   If it's actually overlapped, it'll get made NORMAL (or OBSOLETE) 
-		   when the overlapping node(s) get added to the tree anyway. 
-		*/
-		if ((je32_to_cpu(rd->dsize) >= PAGE_CACHE_SIZE) ||
-		    ( ((je32_to_cpu(rd->offset) & (PAGE_CACHE_SIZE-1))==0) &&
-		      (je32_to_cpu(rd->dsize) + je32_to_cpu(rd->offset) == je32_to_cpu(rd->isize)))) {
-			JFFS2_DBG_READINODE("marking node at %#08x REF_PRISTINE\n", ref_offset(ref));
-			ref->flash_offset = ref_offset(ref) | REF_PRISTINE;
-		} else {
-			JFFS2_DBG_READINODE("marking node at %#08x REF_NORMAL\n", ref_offset(ref));
+		} else if (csize == 0) {
+			/*
+			 * We checked the header CRC. If the node has no data, adjust
+			 * the space accounting now. For other nodes this will be done
+			 * later either when the node is marked obsolete or when its
+			 * data is checked.
+			 */
+			struct jffs2_eraseblock *jeb;
+
+			JFFS2_DBG_READINODE("the node has no data.\n");
+			jeb = &c->blocks[ref->flash_offset / c->sector_size];
+			len = ref_totlen(c, jeb, ref);
+
+			spin_lock(&c->erase_completion_lock);
+			jeb->used_size += len;
+			jeb->unchecked_size -= len;
+			c->used_size += len;
+			c->unchecked_size -= len;
 			ref->flash_offset = ref_offset(ref) | REF_NORMAL;
+			spin_unlock(&c->erase_completion_lock);
 		}
-		spin_unlock(&c->erase_completion_lock);
-	}
-
-	tn = jffs2_alloc_tmp_dnode_info();
-	if (!tn) {
-		JFFS2_ERROR("alloc tn failed\n");
-		return -ENOMEM;
 	}
 
 	tn->fn = jffs2_alloc_full_dnode();
 	if (!tn->fn) {
 		JFFS2_ERROR("alloc fn failed\n");
-		jffs2_free_tmp_dnode_info(tn);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto free_out;
 	}
 	
 	tn->version = je32_to_cpu(rd->version);
 	tn->fn->ofs = je32_to_cpu(rd->offset);
+	tn->data_crc = je32_to_cpu(rd->data_crc);
+	tn->csize = csize;
 	tn->fn->raw = ref;
 	
 	/* There was a bug where we wrote hole nodes out with
 	   csize/dsize swapped. Deal with it */
-	if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && je32_to_cpu(rd->csize))
-		tn->fn->size = je32_to_cpu(rd->csize);
+	if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
+		tn->fn->size = csize;
 	else // normal case...
 		tn->fn->size = je32_to_cpu(rd->dsize);
 
@@ -337,6 +332,10 @@
 	jffs2_add_tn_to_tree(tn, tnp);
 
 	return 0;
+
+free_out:
+	jffs2_free_tmp_dnode_info(tn);
+	return ret;
 }
 
 /*
@@ -347,11 +346,7 @@
  * 	    1 if the node should be marked obsolete;
  * 	    negative error code on failure.
  */
-static inline int
-read_unknown(struct jffs2_sb_info *c,
-	     struct jffs2_raw_node_ref *ref,
-	     struct jffs2_unknown_node *un,
-	     uint32_t read)
+static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
 {
 	/* We don't mark unknown nodes as REF_UNCHECKED */
 	BUG_ON(ref_flags(ref) == REF_UNCHECKED);
@@ -394,9 +389,62 @@
 	return 0;
 }
 
+/*
+ * Helper function for jffs2_get_inode_nodes().
+ * The function detects whether more data should be read and reads it if yes.
+ *
+ * Returns: 0 on succes;
+ * 	    negative error code on failure.
+ */
+static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
+		     int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
+{
+	int right_len, err, len;
+	size_t retlen;
+	uint32_t offs;
+
+	if (jffs2_is_writebuffered(c)) {
+		right_len = c->wbuf_pagesize - (bufstart - buf);
+		if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
+			right_len += c->wbuf_pagesize;
+	} else
+		right_len = right_size;
+
+	if (*rdlen == right_len)
+		return 0;
+
+	/* We need to read more data */
+	offs = ref_offset(ref) + *rdlen;
+	if (jffs2_is_writebuffered(c)) {
+		bufstart = buf + c->wbuf_pagesize;
+		len = c->wbuf_pagesize;
+	} else {
+		bufstart = buf + *rdlen;
+		len = right_size - *rdlen;
+	}
+	
+	JFFS2_DBG_READINODE("read more %d bytes.", len);
+
+	err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
+	if (err) {
+		JFFS2_ERROR("can not read %d bytes from 0x%08x, "
+			"error code: %d.\n", len, offs, err);
+		return err;
+	}
+	
+	if (retlen < len) {
+		JFFS2_ERROR("short read at %#08x: %d instead of %d.\n",
+				offs, retlen, len);
+		return -EIO;
+	}
+
+	*rdlen = right_len;
+
+	return 0;
+}
+
 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
    with this ino, returning the former in order of version */
-
 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 				 struct rb_root *tnp, struct jffs2_full_dirent **fdp,
 				 uint32_t *highest_version, uint32_t *latest_mctime,
@@ -405,22 +453,47 @@
 	struct jffs2_raw_node_ref *ref, *valid_ref;
 	struct rb_root ret_tn = RB_ROOT;
 	struct jffs2_full_dirent *ret_fd = NULL;
-	union jffs2_node_union node;
+	unsigned char *buf = NULL;
+	union jffs2_node_union *node;
 	size_t retlen;
-	int err;
+	int len, err;
 
 	*mctime_ver = 0;
 	
 	JFFS2_DBG_READINODE("ino #%u\n", f->inocache->ino);
 
-	spin_lock(&c->erase_completion_lock);
+	if (jffs2_is_writebuffered(c)) {
+		/*
+		 * If we have the write buffer, we assume the minimal I/O unit
+		 * is c->wbuf_pagesize. We implement some optimizations which in
+		 * this case and we need a temporary buffer of size =
+		 * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
+		 * Basically, we want to read not only the node header, but the
+		 * whole wbuf (NAND page in case of NAND) or 2, if the node
+		 * header overlaps the border between the 2 wbufs.
+		 */
+		len = 2*c->wbuf_pagesize;
+	} else {
+		/*
+		 * When there is no write buffer, the size of the temporary
+		 * buffer is the size of the larges node header.
+		 */
+		len = sizeof(union jffs2_node_union);
+	}
 
+	/* FIXME: in case of NOR and available ->point() this
+	 * needs to be fixed. */
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+			
+	spin_lock(&c->erase_completion_lock);
 	valid_ref = jffs2_first_valid_node(f->inocache->nodes);
-
-	if (!valid_ref && (f->inocache->ino != 1))
-		JFFS2_WARNING("no valid nodes for ino #%u\n", f->inocache->ino);
-
+	if (!valid_ref && f->inocache->ino != 1)
+		JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
 	while (valid_ref) {
+		unsigned char *bufstart;
+
 		/* We can hold a pointer to a non-obsolete node without the spinlock,
 		   but _obsolete_ nodes may disappear at any time, if the block
 		   they're in gets erased. So if we mark 'ref' obsolete while we're
@@ -433,70 +506,100 @@
 
 		cond_resched();
 
+		/*
+		 * At this point we don't know the type of the node we're going
+		 * to read, so we do not know the size of its header. In order
+		 * to minimize the amount of flash IO we assume the node has
+		 * size = JFFS2_MIN_NODE_HEADER.
+		 */
+		if (jffs2_is_writebuffered(c)) {
+			/* 
+			 * We treat 'buf' as 2 adjacent wbufs. We want to
+			 * adjust bufstart such as it points to the
+			 * beginning of the node within this wbuf.
+			 */
+			bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
+			/* We will read either one wbuf or 2 wbufs. */
+			len = c->wbuf_pagesize - (bufstart - buf);
+			if (JFFS2_MIN_NODE_HEADER +
+				(int)(bufstart - buf) > c->wbuf_pagesize) {
+				/* The header spans the border of the
+				 * first wbuf */
+				len += c->wbuf_pagesize;
+			}
+		} else {
+			bufstart = buf;
+			len = JFFS2_MIN_NODE_HEADER;
+		}
+
+		JFFS2_DBG_READINODE("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
+
 		/* FIXME: point() */
-		err = jffs2_flash_read(c, (ref_offset(ref)), 
-				       min_t(uint32_t, ref_totlen(c, NULL, ref), sizeof(node)),
-				       &retlen, (void *)&node);
+		err = jffs2_flash_read(c, ref_offset(ref), len,
+				       &retlen, bufstart);
 		if (err) {
-			JFFS2_ERROR("error %d reading node at 0x%08x in get_inode_nodes()\n", err, ref_offset(ref));
+			JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
+			goto free_out;
+		}
+		
+		if (retlen < len) {
+			JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ref_offset(ref), retlen, len);
+			err = -EIO;
 			goto free_out;
 		}
+		
+		node = (union jffs2_node_union *)bufstart;
 			
-		switch (je16_to_cpu(node.u.nodetype)) {
+		switch (je16_to_cpu(node->u.nodetype)) {
 			
 		case JFFS2_NODETYPE_DIRENT:
-			JFFS2_DBG_READINODE("node at %08x (%d) is a dirent node\n", ref_offset(ref), ref_flags(ref));
-			
-			if (retlen < sizeof(node.d)) {
-				JFFS2_ERROR("short read dirent at %#08x\n", ref_offset(ref));
-				err = -EIO;
-				goto free_out;
-			}
 
-			err = read_direntry(c, ref, &node.d, retlen, &ret_fd, latest_mctime, mctime_ver);
+			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
+				err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
+				if (unlikely(err))
+					goto free_out;
+			}
+			
+			err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
 			if (err == 1) {
 				jffs2_mark_node_obsolete(c, ref);
 				break;
 			} else if (unlikely(err))
 				goto free_out;
 			
-			if (je32_to_cpu(node.d.version) > *highest_version)
-				*highest_version = je32_to_cpu(node.d.version);
+			if (je32_to_cpu(node->d.version) > *highest_version)
+				*highest_version = je32_to_cpu(node->d.version);
 
 			break;
 
 		case JFFS2_NODETYPE_INODE:
-			JFFS2_DBG_READINODE("node at %08x (%d) is a data node\n", ref_offset(ref), ref_flags(ref));
 			
-			if (retlen < sizeof(node.i)) {
-				JFFS2_ERROR("short read dnode at %#08x\n", ref_offset(ref));
-				err = -EIO;
-				goto free_out;
+			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
+				err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
+				if (unlikely(err))
+					goto free_out;
 			}
 
-			err = read_dnode(c, ref, &node.i, retlen, &ret_tn, latest_mctime, mctime_ver);
+			err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
 			if (err == 1) {
 				jffs2_mark_node_obsolete(c, ref);
 				break;
 			} else if (unlikely(err))
 				goto free_out;
 
-			if (je32_to_cpu(node.i.version) > *highest_version)
-				*highest_version = je32_to_cpu(node.i.version);
+			if (je32_to_cpu(node->i.version) > *highest_version)
+				*highest_version = je32_to_cpu(node->i.version);
 			
-			JFFS2_DBG_READINODE("version %d, highest_version now %d\n",
-					je32_to_cpu(node.i.version), *highest_version);
-
 			break;
 
 		default:
-			/* Check we've managed to read at least the common node header */
-			if (retlen < sizeof(struct jffs2_unknown_node)) {
-				JFFS2_ERROR("short read unknown node at %#08x\n", ref_offset(ref));
-				return -EIO;
+			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
+				err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
+				if (unlikely(err))
+					goto free_out;
 			}
-
-			err = read_unknown(c, ref, &node.u, retlen);
+			
+			err = read_unknown(c, ref, &node->u);
 			if (err == 1) {
 				jffs2_mark_node_obsolete(c, ref);
 				break;
@@ -505,17 +608,21 @@
 
 		}
 		spin_lock(&c->erase_completion_lock);
-
 	}
+
 	spin_unlock(&c->erase_completion_lock);
 	*tnp = ret_tn;
 	*fdp = ret_fd;
+	kfree(buf);
 
+	JFFS2_DBG_READINODE("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
+			f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
 	return 0;
 
  free_out:
 	jffs2_free_tmp_dnode_info_list(&ret_tn);
 	jffs2_free_full_dirent_list(ret_fd);
+	kfree(buf);
 	return err;
 }
 
@@ -523,14 +630,13 @@
 					struct jffs2_inode_info *f,
 					struct jffs2_raw_inode *latest_node)
 {
-	struct jffs2_tmp_dnode_info *tn = NULL;
+	struct jffs2_tmp_dnode_info *tn;
 	struct rb_root tn_list;
 	struct rb_node *rb, *repl_rb;
 	struct jffs2_full_dirent *fd_list;
-	struct jffs2_full_dnode *fn = NULL;
+	struct jffs2_full_dnode *fn, *first_fn = NULL;
 	uint32_t crc;
 	uint32_t latest_mctime, mctime_ver;
-	uint32_t mdata_ver = 0;
 	size_t retlen;
 	int ret;
 
@@ -550,42 +656,33 @@
 	rb = rb_first(&tn_list);
 
 	while (rb) {
+		cond_resched();
 		tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
 		fn = tn->fn;
-
-		if (f->metadata) {
-			if (likely(tn->version >= mdata_ver)) {
-				JFFS2_DBG_READINODE("obsoleting old metadata at 0x%08x\n", ref_offset(f->metadata->raw));
-				jffs2_mark_node_obsolete(c, f->metadata->raw);
-				jffs2_free_full_dnode(f->metadata);
-				f->metadata = NULL;
-				
-				mdata_ver = 0;
-			} else {
-				/* This should never happen. */
-				JFFS2_ERROR("Er. New metadata at 0x%08x with ver %d is actually older than previous ver %d at 0x%08x\n",
-					  ref_offset(fn->raw), tn->version, mdata_ver, ref_offset(f->metadata->raw));
-				jffs2_mark_node_obsolete(c, fn->raw);
-				jffs2_free_full_dnode(fn);
-				/* Fill in latest_node from the metadata, not this one we're about to free... */
-				fn = f->metadata;
-				goto next_tn;
-			}
-		}
+		ret = 1;
+		JFFS2_DBG_READINODE("consider node ver %u, phys offset "
+			"%#08x(%d), range %u-%u.\n", tn->version,
+			ref_offset(fn->raw), ref_flags(fn->raw),
+			fn->ofs, fn->ofs + fn->size);
 
 		if (fn->size) {
-			jffs2_add_full_dnode_to_inode(c, f, fn);
-		} else {
-			/* Zero-sized node at end of version list. Just a metadata update */
-			JFFS2_DBG_READINODE("metadata @%08x: ver %d\n", ref_offset(fn->raw), tn->version);
+			ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
+			/* TODO: the error code isn't checked, check it */
+			jffs2_dbg_fragtree_paranoia_check_nolock(f);
+			BUG_ON(ret < 0);
+			if (!first_fn && ret == 0)
+				first_fn = fn;
+		} else if (!first_fn) {
+			first_fn = fn;
 			f->metadata = fn;
-			mdata_ver = tn->version;
-		}
-	next_tn:
+			ret = 0; /* Prevent freeing the metadata update node */
+		} else
+			jffs2_mark_node_obsolete(c, fn->raw);
+			
 		BUG_ON(rb->rb_left);
 		if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
 			/* We were then left-hand child of our parent. We need
-			   to move our own right-hand child into our place. */
+			 * to move our own right-hand child into our place. */
 			repl_rb = rb->rb_right;
 			if (repl_rb)
 				repl_rb->rb_parent = rb->rb_parent;
@@ -595,7 +692,7 @@
 		rb = rb_next(rb);
 
 		/* Remove the spent tn from the tree; don't bother rebalancing
-		   but put our right-hand child in our own place. */
+		 * but put our right-hand child in our own place. */
 		if (tn->rb.rb_parent) {
 			if (tn->rb.rb_parent->rb_left == &tn->rb)
 				tn->rb.rb_parent->rb_left = repl_rb;
@@ -606,10 +703,18 @@
 			tn->rb.rb_right->rb_parent = NULL;
 
 		jffs2_free_tmp_dnode_info(tn);
+		if (ret) {
+			JFFS2_DBG_READINODE("delete dnode %u-%u.\n",
+				fn->ofs, fn->ofs + fn->size);
+			jffs2_free_full_dnode(fn);
+		}
 	}
 	jffs2_dbg_fragtree_paranoia_check_nolock(f);
 
-	if (!fn) {
+	BUG_ON(first_fn && ref_obsolete(first_fn->raw));
+
+	fn = first_fn;
+	if (unlikely(!first_fn)) {
 		/* No data nodes for this inode. */
 		if (f->inocache->ino != 1) {
 			JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);





More information about the linux-mtd-cvs mailing list