afs/fs/cachefs vjournal.c,NONE,1.1 super.c,1.27,1.28 recycling.c,1.17,1.18 main.c,1.14,1.15 kcachefsd.c,1.9,1.10 journal.c,1.30,1.31 io.c,1.12,1.13 interface.c,1.5,1.6 index.c,1.16,1.17 dump-journal.c,1.8,1.9 cachetest-main.c,1.8,1.9 cachefs-layout.h,1.21,1.22 cachefs-int.h,1.30,1.31 block.c,1.4,1.5 aops.c,1.28,1.29 Makefile,1.15,1.16

Fri May 23 14:59:25 BST 2003

Update of /home/cvs/afs/fs/cachefs
In directory phoenix.infradead.org:/tmp/cvs-serv27749/fs/cachefs

Modified Files:
	super.c recycling.c main.c kcachefsd.c journal.c io.c 
	interface.c index.c dump-journal.c cachetest-main.c 
	cachefs-layout.h cachefs-int.h block.c aops.c Makefile 
Added Files:
	vjournal.c 
Log Message:
added a validity journal to keep track of data cache blocks which are
allocated but not yet written

--- NEW FILE vjournal.c ---
/* vjournal.c: validity journal management
 *
 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells at redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "cachefs-int.h"

/*****************************************************************************/
/*
 * allocate an entry in the block validity tracking journal
 * - returned attached to trans->vjentry
 */
int cachefs_vj_alloc(struct cachefs_transaction *trans, struct cachefs_inode *inode)
{
	DECLARE_WAITQUEUE(myself,current);

	struct cachefs_vj_entry *vjentry;
	struct cachefs_super *super;
	cachefs_blockix_t bix;
	int slot, ret;

	kenter("");

	super = trans->super;

	vjentry = kmalloc(sizeof(*vjentry),GFP_KERNEL);
	if (!vjentry) {
		kleave(" = -ENOMEM");
		return -ENOMEM;
	}

	memset(vjentry,0,sizeof(vjentry));
	INIT_LIST_HEAD(&vjentry->link);

	vjentry->ino = inode->vfs_inode.i_ino;

	/* now allocate a slot when one becomes available */
	spin_lock(&super->vjnl_lock);

	if (super->vjnl_count==0) {
		set_current_state(TASK_INTERRUPTIBLE);
		add_wait_queue(&super->vjnl_alloc_wq,&myself);

		while (super->vjnl_count!=0 && !signal_pending(current)) {
			spin_unlock(&super->vjnl_lock);
			schedule();
			spin_lock(&super->vjnl_lock);
			set_current_state(TASK_INTERRUPTIBLE);
		}

		set_current_state(TASK_RUNNING);
		remove_wait_queue(&super->vjnl_alloc_wq,&myself);

		ret = -EINTR;
		if (signal_pending(current))
			goto error_free;
	}

	slot = find_first_zero_bit(super->vjnl_map,CACHEFS_ONDISC_VJNL_ENTS);
	if (slot<0 || slot>=CACHEFS_ONDISC_VJNL_ENTS)
		BUG();

	set_bit(slot,super->vjnl_map);
	super->vjnl_count--;

	spin_unlock(&super->vjnl_lock);

	/* got a slot - now find the block and page holding it */
	kdebug("VJ slot %d",slot);

	vjentry->vslot	= slot;
	vjentry->ventry	= slot % CACHEFS_ONDISC_VJNL_ENTPERPAGE;

	bix = slot / CACHEFS_ONDISC_VJNL_ENTPERPAGE;
	bix += super->layout->bix_vjournal;

	ret = cachefs_block_read(super,NULL,bix,0,&vjentry->vblock,&vjentry->vpage);
	if (ret<0)
		goto error_clearbit;

	trans->vjentry = vjentry;

	cachefs_trans_affects_block(trans,vjentry->vblock,vjentry->ventry,
				    sizeof(struct cachefs_ondisc_validity_journal));

	kleave(" = 0");
	return 0;

 error_clearbit:
	spin_lock(&super->vjnl_lock);
	clear_bit(slot,super->vjnl_map);
	super->vjnl_count++;

 error_free:
	spin_unlock(&super->vjnl_lock);
	kfree(vjentry);
	kleave(" = %d",ret);
	return ret;

} /* end cachefs_vj_alloc() */

/*****************************************************************************/
/*
 * release a v-journal entry
 * - clear the allocation map bit and wake up anyone trying to allocate
 */
void cachefs_vj_release(struct cachefs_super *super, struct cachefs_vj_entry *vjentry)
{
	kenter("");

	spin_lock(&super->vjnl_lock);
	clear_bit(vjentry->vslot,super->vjnl_map);
	super->vjnl_count++;
	spin_unlock(&super->vjnl_lock);

	wake_up(&super->vjnl_alloc_wq);
	
	cachefs_put_page(vjentry->vpage);
	cachefs_block_put(vjentry->vblock);
	kfree(vjentry);

	kleave("");
} /* end cachefs_vj_release() */

/*****************************************************************************/
/*
 * clear a v-journal entry due to the target block having been written
 */
void cachefs_vj_write_complete(struct cachefs_vj_entry *vjentry)
{
	kenter("");

	kleave("");
} /* end cachefs_vj_write_complete() */

/*****************************************************************************/
/*
 * queue an invalid block for detachment and recycling
 */
void cachefs_vj_cancel(struct cachefs_vj_entry *vjentry)
{
	struct cachefs_super *super;

	kenter("{vs=%u pg={%u}+%u up={%u}+%u",
	       vjentry->vslot,vjentry->ino,vjentry->pgnum,vjentry->upblock,vjentry->upentry);

	super = vjentry->vblock->super;

	spin_lock(&super->vjnl_lock);
	list_move_tail(&vjentry->link,&super->vjnl_unallocq);
	spin_unlock(&super->vjnl_lock);

	wake_up(&super->dmn_sleepq);

	kleave("");
} /* end cachefs_vj_cancel() */

Index: super.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/super.c,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- super.c	29 Apr 2003 15:24:06 -0000	1.27
+++ super.c	23 May 2003 12:59:21 -0000	1.28
@@ -264,6 +264,12 @@
 	if (!super->rcm_atm_list)
 		goto error;
 
+	super->vjnl_map = (unsigned long*) get_zeroed_page(GFP_KERNEL);
+	if (!super->vjnl_map)
+		goto error;
+
+	super->vjnl_count = CACHEFS_ONDISC_VJNL_ENTS;
+
 	/* fill in the superblock */
 	sb->s_magic		= CACHEFS_FS_MAGIC;
 	sb->s_op		= &cachefs_super_ops;
@@ -305,6 +311,10 @@
 	super->batch_timer.function = cachefs_trans_batch_timer;
 	super->batch_timer.data = (unsigned long) super;
 
+	spin_lock_init(&super->vjnl_lock);
+	init_waitqueue_head(&super->vjnl_alloc_wq);
+	INIT_LIST_HEAD(&super->vjnl_unallocq);
+
 	init_MUTEX(&super->alloc_sem);
 	init_waitqueue_head(&super->alloc_wq);
 
@@ -420,6 +430,7 @@
 		if (super->recycle_node)	page_cache_release(super->recycle_node);
 		if (super->rcm_atm_list)	free_page((unsigned long)super->rcm_atm_list);
 		if (super->rcm_imm_buf)		free_page((unsigned long)super->rcm_imm_buf);
+		if (super->vjnl_map)		free_page((unsigned long)super->vjnl_map);
 
 		kcachefs_jnld_remove_super(super);
 	}
@@ -493,7 +504,8 @@
 	qty = super->layout->ujnl_rsize * CACHEFS_ONDISC_UJNL_NUMENTS / super->layout->bsize;
 
 	super->layout->bix_ujournal	= 1 + ndirect;
-	super->layout->bix_wbjournal	= super->layout->bix_ujournal  + qty;
+	super->layout->bix_vjournal	= super->layout->bix_ujournal  + qty;
+	super->layout->bix_wbjournal	= super->layout->bix_vjournal  + CACHEFS_ONDISC_VJNL_SIZE;
 	super->layout->bix_cache	= super->layout->bix_wbjournal + CACHEFS_ONDISC_WBJNL_SIZE;
 	super->layout->bix_unready	= super->layout->bix_cache;
 	super->layout->bix_end		= nblocks;
@@ -506,6 +518,7 @@
 	       sizeof(struct cachefs_ondisc_update_journal),
 	       super->layout->ujnl_rsize);
 
+	printk("CacheFS: %08x validity journal\n",	super->layout->bix_vjournal);
 	printk("CacheFS: %08x writeback journal\n",	super->layout->bix_wbjournal);
 	printk("CacheFS: %08x data cache\n",		super->layout->bix_cache);
 	printk("CacheFS: %08x end\n",			super->layout->bix_end);
@@ -694,6 +707,8 @@
 {
 	struct cachefs_super *super = sb->s_fs_info;
 
+	DECLARE_WAITQUEUE(myself,current);
+
 	printk("\n\n");
 	kenter("{%p}",super);
 
@@ -701,6 +716,19 @@
 
 	cachefs_withdraw_cache(super);
 
+	if (!list_empty(&super->vjnl_unallocq)) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&super->vjnl_alloc_wq,&myself);
+
+		while (!list_empty(&super->vjnl_unallocq)) {
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+		}
+
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&super->vjnl_alloc_wq,&myself);
+	}
+
 	cachefs_trans_sync(super,1);
 
 	super->dmn_die = 1;
@@ -709,36 +737,25 @@
 
 	del_timer_sync(&super->batch_timer);
 
-	if (super->alloc_node) {
-		dbgpgfree(super->alloc_node);
-		page_cache_release(super->alloc_node);
-	}
-
-	if (super->alloc_block)
-		cachefs_block_put(super,super->alloc_block);
-
-	if (super->alloc_next) {
-		dbgpgfree(super->alloc_next);
-		page_cache_release(super->alloc_next);
-	}
-
-	if (super->alloc_nxblock)
-		cachefs_block_put(super,super->alloc_nxblock);
-
-	if (super->recycle_node) {
-		dbgpgfree(super->recycle_node);
-		page_cache_release(super->recycle_node);
-	}
-
-	if (super->recycle_block)
-		cachefs_block_put(super,super->recycle_block);
-
-	if (super->rcm_atm_list)	free_page((unsigned long)super->rcm_atm_list);
-	if (super->rcm_imm_buf)		free_page((unsigned long)super->rcm_imm_buf);
+	dbgpgfree(super->alloc_node);
+	cachefs_put_page(super->alloc_node);
+	cachefs_block_put(super->alloc_block);
+
+	dbgpgfree(super->alloc_next);
+	cachefs_put_page(super->alloc_next);
+	cachefs_block_put(super->alloc_nxblock);
+
+	dbgpgfree(super->recycle_node);
+	cachefs_put_page(super->recycle_node);
+	cachefs_block_put(super->recycle_block);
+
+	free_page((unsigned long)super->rcm_atm_list);
+	free_page((unsigned long)super->rcm_imm_buf);
+	free_page((unsigned long)super->vjnl_map);
 
 	kcachefs_jnld_remove_super(super);
 
-	page_cache_release(virt_to_page(super->layout));
+	cachefs_put_page(virt_to_page(super->layout));
 	cachefs_iput(super->istorage);
 	iput(super->imisc);
 

Index: recycling.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/recycling.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- recycling.c	24 Apr 2003 12:02:33 -0000	1.17
+++ recycling.c	23 May 2003 12:59:21 -0000	1.18
@@ -94,7 +94,7 @@
 	node = (struct cachefs_ondisc_free_node*) kmap(page);
 	node->next = super->recycle_cur;
 	node->count = super->recycle_cur_n;
-	kunmap(node);
+	kunmap(page);
 
 	super->recycle_cur	= bix;
 	super->recycle_room	= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE;
@@ -107,7 +107,7 @@
 	/* go do it */
 	cachefs_trans_commit(trans);
 
-	if (block) cachefs_block_put(super,block);
+	if (block) cachefs_block_put(block);
 	if (page) {
 		dbgpgfree(page);
 		page_cache_release(page);
@@ -119,7 +119,7 @@
  error_rel_trans:
 	cachefs_trans_put(trans);
  error_rel_block:
-	cachefs_block_put(super,block);
+	cachefs_block_put(block);
 	_leave(" = %d",ret);
 	return ret;
 } /* end cachefs_recycle_begin_new_node() */
@@ -192,13 +192,14 @@
 		*pbix = super->layout->bix_unready + loop;
 	}
 
-	kunmap(node);
+	kunmap(super->recycle_node);
 
 	/* send to disc */
 	super->layout->bix_unready += qty;
 	super->recycle_room -= qty;
 
-	if (super->recycle_room==0) super->recycle_cur_n++;
+	if (super->recycle_room==0)
+		super->recycle_cur_n++;
 
 	cachefs_trans_commit(trans);
 
@@ -280,7 +281,7 @@
 		/* mark the journal to begin the transfer */
 		trans = cachefs_trans_alloc(super,GFP_KERNEL);
 		if (!trans) {
-			cachefs_block_put(super,block);
+			cachefs_block_put(block);
 			page_cache_release(page);
 			_leave(" [ENOMEM]");
 			return;
@@ -297,7 +298,7 @@
 			printk("CacheFS: failed to mark ujnl: %d\n",ret);
 			dbgpgfree(page);
 			page_cache_release(page);
-			cachefs_block_put(super,block);
+			cachefs_block_put(block);
 			cachefs_trans_put(trans);
 			_leave(" [error %d]",ret);
 			return;
@@ -417,7 +418,8 @@
 
 	super->recycle_room -= count;
 	super->recycle_cur_n += count;
-	if (super->recycle_room==0) super->recycle_cur_n++;
+	if (super->recycle_room==0)
+		super->recycle_cur_n++;
 
 	/* transfer from the jentry to the recycling block */
 	cachefs_block_modify(super,super->recycle_block,&super->recycle_node);
@@ -426,7 +428,7 @@
 
 	memcpy(&node->leaves[dst],trans->jentry->rcyptrs,count*sizeof(cachefs_blockix_t));
 
-	kunmap(node);
+	kunmap(super->recycle_node);
 
 	super->rcm_ptrnext = src;
 
@@ -509,7 +511,8 @@
 		trans->jentry->upblock	= storage->freelink;
 		kunmap(iinode->storage);
 
-		cachefs_trans_affects_page(trans,ixpage,offset,iinode->index_esize);
+		cachefs_trans_affects_page(trans,__cachefs_page_get_private(ixpage),offset,
+					   iinode->index_esize);
 		cachefs_trans_affects_inode(trans,iinode);
 	}
 
@@ -558,9 +561,9 @@
 	trans = NULL;
 
  error:
-	if (trans)	cachefs_trans_put(trans);
-	if (ixpage)	put_page(ixpage);
-	if (iinode)	cachefs_iput(iinode);
+	cachefs_trans_put(trans);
+	cachefs_put_page(ixpage);
+	cachefs_iput(iinode);
 
 	kleave(" = %d",ret);
 	return ret;
@@ -665,7 +668,7 @@
 				if (bix)
 					break;
 			}
-			kunmap(pbix);
+			kunmap(dpage);
 
 			indirect = loop + 3;
 			if (bix) {
@@ -812,4 +815,130 @@
 		set_bit(CACHEFS_SUPER_DO_RECLAIM,&super->flags);
 	kleave("");
 	return;
-} /* end cachefs_recycle_immediate_reclaim() */
+} /* end cachefs_recycle_reclaim() */
+
+/*****************************************************************************/
+/*
+ * unallocate and recycle a single data storage block that's marked as invalid
+ * in the validity journal
+ */
+void cachefs_recycle_unallocate_data_block(struct cachefs_super *super)
+{
+	struct cachefs_ondisc_free_node *node;
+	struct cachefs_transaction *trans;
+	struct cachefs_vj_entry *vjentry;
+	struct cachefs_block *rcyblock = NULL, *upblock = NULL;
+	struct page *rcypage = NULL, *uppage = NULL;
+	int ret;
+
+	kenter("");
+
+	BUG_ON(list_empty(&super->vjnl_unallocq));
+
+	spin_lock(&super->vjnl_lock);
+	vjentry = list_entry(super->vjnl_unallocq.next,struct cachefs_vj_entry,link);
+	list_del_init(&vjentry->link);
+	spin_unlock(&super->vjnl_lock);
+
+	ret = -ENOMEM;
+	trans = cachefs_trans_alloc(super,GFP_KERNEL);
+	if (!trans)
+		goto error;
+
+	trans->jentry->mark   	= CACHEFS_ONDISC_UJNL_DATA_UNALLOCING;
+	trans->jentry->ino	= vjentry->ino;
+	trans->jentry->auxmark	= vjentry->vslot;
+	trans->jentry->block	= vjentry->bix;
+	trans->jentry->upblock	= vjentry->upblock;
+	trans->jentry->upentry	= vjentry->upentry / sizeof(cachefs_blockix_t);
+	trans->jentry->auxblock	= super->recycle_cur;
+	trans->jentry->auxentry	= ~0;
+
+	ret = cachefs_block_read(super,NULL,vjentry->upblock,1,&upblock,&uppage);
+	if (ret<0)
+		goto error_free;
+
+	cachefs_trans_affects_block(trans,upblock,vjentry->upentry,sizeof(cachefs_blockix_t));
+
+	cachefs_trans_affects_block(trans,vjentry->vblock,vjentry->ventry,
+				    sizeof(struct cachefs_ondisc_validity_journal));
+
+	if (super->recycle_room==0) {
+		/* we have to generate a new recycling node */
+		ret = cachefs_block_read(super,NULL,vjentry->bix,1,&rcyblock,&rcypage);
+		if (ret<0)
+			goto error_free;
+
+		cachefs_trans_affects_block(trans,rcyblock,0,PAGE_SIZE);
+	}
+	else {
+		/* we can add into an existing recycling node */
+		cachefs_trans_affects_block(trans,super->recycle_block,0,PAGE_SIZE);
+
+		trans->jentry->auxentry	=
+			CACHEFS_ONDISC_LEAVES_PER_FREE_NODE - super->recycle_room;
+	}
+
+	/* mark the journal and then make modifications */
+	ret = cachefs_trans_mark(trans);
+	if (ret<0)
+		goto error_free;
+
+	cachefs_block_modify(super,vjentry->vblock,&vjentry->vpage);
+	memset(kmap(vjentry->vpage) + vjentry->ventry, 0,
+	       sizeof(struct cachefs_ondisc_validity_journal));
+	kunmap(vjentry->vpage);
+
+	cachefs_block_modify(super,upblock,&uppage);
+	memset(kmap(uppage) + vjentry->upentry, 0, sizeof(cachefs_blockix_t));
+	kunmap(uppage);
+
+	if (trans->jentry->auxentry==UINT_MAX) {
+		/* new recycling node */
+		node = kmap(rcypage);
+		node->next  = super->recycle_cur;
+		node->count = super->recycle_cur_n;
+		kunmap(rcypage);
+
+		super->recycle_cur	= vjentry->bix;
+		super->recycle_room	= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE;
+		rcypage	 = xchg(&super->recycle_node,rcypage);
+		rcyblock = xchg(&super->recycle_block,rcyblock);
+	}
+	else {
+		/* add to existing recycling node */
+		cachefs_block_modify(super,super->recycle_block,&super->recycle_node);
+
+		node = kmap(super->recycle_node);
+		node->leaves[trans->jentry->auxentry] = vjentry->bix;
+		kunmap(super->recycle_node);
+
+		super->recycle_room--;
+		super->recycle_cur_n++;
+		if (super->recycle_room==0)
+			super->recycle_cur_n++;
+	}
+
+	cachefs_trans_commit(trans);
+
+	cachefs_vj_release(super,vjentry);
+	cachefs_put_page(uppage);
+	cachefs_put_page(rcypage);
+	cachefs_block_put(upblock);
+	cachefs_block_put(rcyblock);
+	kleave("");
+	return;
+
+ error_free:
+	cachefs_put_page(uppage);
+	cachefs_put_page(rcypage);
+	cachefs_block_put(upblock);
+	cachefs_block_put(rcyblock);
+	cachefs_trans_put(trans);
+ error:
+	spin_lock(&super->vjnl_lock);
+	list_add_tail(&vjentry->link,&super->vjnl_unallocq);
+	spin_unlock(&super->vjnl_lock);
+	kleave(" [error %d]",ret);
+
+} /* end cachefs_recycle_unallocate_data_block() */

Index: main.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/main.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- main.c	24 Apr 2003 15:14:10 -0000	1.14
+++ main.c	23 May 2003 12:59:21 -0000	1.15
@@ -42,7 +42,7 @@
 	if (ret<0)
 		goto error;
 
-	/* create ourselves a cookie jar */
+	/* create ourselves a cookie jar and a block har */
 	ret = -ENOMEM;
 	cachefs_cookie_jar = kmem_cache_create("cachefs_cookie_jar",
 					       sizeof(struct cachefs_cookie),
@@ -55,12 +55,25 @@
 		goto error_jnld;
 	}
 
+	cachefs_block_jar = kmem_cache_create("cachefs_block_jar",
+					      sizeof(struct cachefs_block),
+					      0,
+					      SLAB_HWCACHE_ALIGN,
+					      cachefs_block_init_once,
+					      NULL);
+	if (!cachefs_block_jar) {
+		printk(KERN_NOTICE "CacheFS: Failed to allocate a block jar\n");
+		goto error_cookie_jar;
+	}
+
 	ret = cachefs_fs_init();
 	if (ret<0)
-		goto error_cookie_jar;
+		goto error_block_jar;
 
 	return ret;
 
+ error_block_jar:
+	kmem_cache_destroy(cachefs_block_jar);
  error_cookie_jar:
 	kmem_cache_destroy(cachefs_cookie_jar);
  error_jnld:
@@ -80,6 +93,7 @@
 
 	cachefs_fs_exit();
 	kcachefs_jnld_stop();	
+	kmem_cache_destroy(cachefs_block_jar);
 	kmem_cache_destroy(cachefs_cookie_jar);
 
 } /* end cachefs_exit() */

Index: kcachefsd.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/kcachefsd.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- kcachefsd.c	2 Apr 2003 09:12:31 -0000	1.9
+++ kcachefsd.c	23 May 2003 12:59:21 -0000	1.10
@@ -70,23 +70,24 @@
 		add_wait_queue(&super->batch_timer_wq,&myself2);
 
 		for (;;) {
-			/* deal with the server being asked to die */
-			if (super->dmn_die) {
-				remove_wait_queue(&super->dmn_sleepq,&myself);
-				_leave("");
-				complete_and_exit(&super->dmn_dead,0);
-			}
+			/* discard pending signals */
+			discard_my_signals();
 
 			/* see if there's work to be done */
 			if (!super->alloc_node ||
 			    super->layout->bix_unready < super->layout->bix_end ||
 			    test_bit(CACHEFS_SUPER_BATCH_TIMER,&super->flags) ||
-			    test_bit(CACHEFS_SUPER_DO_RECLAIM,&super->flags)
+			    test_bit(CACHEFS_SUPER_DO_RECLAIM,&super->flags) ||
+			    !list_empty(&super->vjnl_unallocq)
 			    )
 				break;
 
-			/* discard pending signals */
-			discard_my_signals();
+			/* deal with the server being asked to die */
+			if (super->dmn_die) {
+				remove_wait_queue(&super->dmn_sleepq,&myself);
+				_leave("");
+				complete_and_exit(&super->dmn_dead,0);
+			}
 
 			schedule();
 
@@ -115,6 +116,10 @@
 				yield();
 			}
 		}
+
+		/* deal with invalid data block unallocation */
+		if (!list_empty(&super->vjnl_unallocq))
+			cachefs_recycle_unallocate_data_block(super);
 
 		/* if there's no next node, then get one */
 		if (!super->alloc_node)

Index: journal.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/journal.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -r1.30 -r1.31
--- journal.c	24 Apr 2003 12:02:33 -0000	1.30
+++ journal.c	23 May 2003 12:59:21 -0000	1.31
@@ -120,7 +120,7 @@
 /*
  * release a reference to a transaction and ultimately free it
  */
-void cachefs_trans_put(struct cachefs_transaction *trans)
+void __cachefs_trans_put(struct cachefs_transaction *trans)
 {
 	unsigned long flags;
 	int loop;
@@ -145,7 +145,7 @@
 
 	for (loop=0; loop<CACHEFS_EFFECTS_PER_TRANS; loop++) {
 		if (trans->effects[loop].block)
-			cachefs_block_put(trans->super,xchg(&trans->effects[loop].block,NULL));
+			cachefs_block_put(xchg(&trans->effects[loop].block,NULL));
 		if (trans->effects[loop].held_page)
 			put_page(trans->effects[loop].held_page);
 	}
@@ -155,20 +155,21 @@
 		kfree(trans->jentry);
 	}
 
-	if (trans->jpage) {
-		dbgpgfree(trans->jpage);
-		page_cache_release(trans->jpage);
-	}
+	dbgpgfree(trans->jpage);
+	page_cache_release(trans->jpage);
 
 	if (trans->jblock)
-		cachefs_block_put(trans->super,xchg(&trans->jblock,NULL));
+		cachefs_block_put(xchg(&trans->jblock,NULL));
+
+	if (trans->vjentry && trans->phase == CACHEFS_TRANS_PREPARING)
+		cachefs_vj_release(trans->super,trans->vjentry);
 
 	atomic_inc(&trans->super->cnt_ujnl_free);
 	dbgfree(trans);
 	kfree(trans);
 
 	_leave(" [dead]");
-} /* end cachefs_trans_put() */
+} /* end __cachefs_trans_put() */
 
 /*****************************************************************************/
 /*
@@ -179,8 +180,6 @@
 				 unsigned offset,
 				 unsigned size)
 {
-	struct cachefs_trans_effect *effect;
-	struct cachefs_super *super = trans->super;
 	int ix = trans->eff_active;
 
 	_enter("%p{efa=%d},{%u},%u,%u",trans,ix,target->bix,offset,size);
@@ -189,12 +188,7 @@
 	BUG_ON(ix>=CACHEFS_EFFECTS_PER_TRANS);
 	BUG_ON(!target);
 
-	effect = &trans->effects[ix];
-	effect->block	= cachefs_block_get(target);
-	effect->start	= offset >> super->sb->s_blocksize_bits;
-	effect->stop	= (size + offset + super->sb->s_blocksize - 1);
-	effect->stop	>>= super->sb->s_blocksize_bits;
-
+	trans->effects[ix].block = cachefs_block_get(target);
 	trans->eff_active++;
 
 } /* end cachefs_trans_affects_block() */
@@ -210,6 +204,7 @@
 {
 	DECLARE_WAITQUEUE(myself,current);
 
+	struct cachefs_ondisc_validity_journal *vjentry;
 	struct cachefs_trans_effect *effect;
 	struct cachefs_super *super = trans->super;
 	cachefs_blockix_t bix;
@@ -347,7 +342,8 @@
 	kjournal("UJNL[%05u] %s,%x %%%hd.%hu i=%x,%u"
 		 " b=%u,%hu a=%u,%hu u=%u,%hu c=%hu A=%u[%hu] R=%u Y=%u:%hu-%hu",
 		 trans->index + super->ujnl_jsof,
-		 cachefs_ondisc_ujnl_marks[trans->jentry->mark],trans->jentry->auxmark,
+		 cachefs_ondisc_ujnl_marks[trans->jentry->mark],
+		 trans->jentry->auxmark,
 		 trans->jentry->batch,		trans->jentry->serial,
 		 trans->jentry->ino,		trans->jentry->index,
 		 trans->jentry->block,		trans->jentry->entry,
@@ -368,6 +364,15 @@
 
 	atomic_inc(&super->cnt_ujnl_mkgr);
 
+	/* record a mark in the validity journal */
+	if (trans->vjentry) {
+		cachefs_block_modify(super,trans->vjentry->vblock,&trans->vjentry->vpage);
+		vjentry = kmap(trans->vjentry->vpage) + trans->vjentry->ventry;
+		vjentry->ino	= trans->vjentry->ino;
+		vjentry->pgnum	= trans->vjentry->pgnum;
+		kunmap(trans->vjentry->vpage);
+	}
+
 	_leave(" = 0");
 	return 0;
 
@@ -383,7 +388,8 @@
 /*****************************************************************************/
 /*
  * commit a transaction
- * - all changes associated with this mark must have been made to the in-memory data before calling
+ * - all changes associated with this mark must have been made to the
+ *   in-memory data before calling
  * - this marks the transaction as being ready to be written to disc
  */
 void cachefs_trans_commit(struct cachefs_transaction *trans)
@@ -394,7 +400,9 @@
 
 	_enter("{%d,%u}",trans->serial,trans->phase);
 
-	cachefs_ujnl_set_phase(trans,CACHEFS_TRANS_COMMITTING,CACHEFS_TRANS_MARKED);
+	cachefs_ujnl_set_phase(trans,
+			       CACHEFS_TRANS_COMMITTING,
+			       CACHEFS_TRANS_MARKED);
 
 	for (loop=0; loop<CACHEFS_EFFECTS_PER_TRANS; loop++)
 		if (trans->effects[loop].held_page)
@@ -408,14 +416,16 @@
 	list_move_tail(&trans->sblink,&super->ujnl_commitq);
 
 	if (!timer_pending(&super->batch_timer)) {
-		super->batch_timer.expires = jiffies + CACHEFS_BATCH_WRITE_TIMER * HZ;
+		super->batch_timer.expires =
+			jiffies + CACHEFS_BATCH_WRITE_TIMER * HZ;
 		add_timer(&super->batch_timer);
 	}
 
 	spin_unlock_irqrestore(&super->ujnl_mk_lock,flags);
 
-	/* release the lock obtained during marking that prevents the batch writer from running
-	 * whilst the in-memory copies of the meta-data are being modified
+	/* release the lock obtained during marking that prevents the batch
+	 * writer from running whilst the in-memory copies of the meta-data are
+	 * being modified
 	 */
 	up_read(&super->batch_ctrl_sem);
 
@@ -449,14 +459,15 @@
 
 	clear_bit(CACHEFS_SUPER_BATCH_TIMER,&super->flags);
 
-	/* create a barrier against new transactions being marked and thus further modifications
-	 * been made to the metadata we're about to write out
-	 */
+	/* create a barrier against new transactions being marked and thus
+	 * further modifications been made to the metadata we're about to write
+	 * out */
 	down_write(&super->batch_ctrl_sem);
 
 	BUG_ON(!list_empty(&super->ujnl_markq));
 
-	/* mark all modified journalling and data blocks for writeback (and COW for data too) */
+	/* mark all modified journalling and data blocks for writeback (and COW
+	 * for data too) */
 	list_for_each(_p,&super->ujnl_commitq) {
 		trans = list_entry(_p,struct cachefs_transaction,sblink);
 
@@ -500,7 +511,8 @@
 		       block->bix,block->flags,block->page,block->writeback);
 	}
 
-	/* decide where to put the BATCH and ACK marks and what the data for it is */
+	/* decide where to put the BATCH and ACK marks and what the data for it
+	 * is */
 	jstop = super->ujnl_head;
 	super->ujnl_head = (jstop+2) & (CACHEFS_ONDISC_UJNL_NUMENTS-1);
 
@@ -526,30 +538,32 @@
 
 	up_write(&super->batch_ctrl_sem);
 
-	/* write to the update journal with a barrier and then write the data */
+	/* write to the update journal with a barrier and then write the
+	 * data */
 	cachefs_trans_batch_write_ujnl(super,jstop);
 
-	ajentry->mark		= CACHEFS_ONDISC_UJNL_BATCH;
-	cachefs_trans_batch_write_marker(super,jstop,ajentry);
+	ajentry->mark = CACHEFS_ONDISC_UJNL_BATCH;
 
+	cachefs_trans_batch_write_marker(super,jstop,ajentry);
 	cachefs_trans_batch_write_data(super);
-
 	cachefs_trans_batch_process_written_blocks(super,2);
 
 	/* polish off with an ACK */
 	jstop = (jstop+1) & (CACHEFS_ONDISC_UJNL_NUMENTS-1);
 
-	ajentry->mark		= CACHEFS_ONDISC_UJNL_ACK;
+	ajentry->mark = CACHEFS_ONDISC_UJNL_ACK;
 	ajentry->serial++;
 
 	cachefs_trans_batch_write_ack(super,jstop,ajentry);
 	kfree(ajentry);
 
-	super->ujnl_tail = (super->ujnl_tail+2) & (CACHEFS_ONDISC_UJNL_NUMENTS-1);
+	super->ujnl_tail =
+		(super->ujnl_tail+2) & (CACHEFS_ONDISC_UJNL_NUMENTS-1);
 
 	/* clean up the transactions that we've now written */
 	while (!list_empty(&super->ujnl_writeq)) {
-		trans = list_entry(super->ujnl_writeq.next,struct cachefs_transaction,sblink);
+		trans = list_entry(super->ujnl_writeq.next,
+				   struct cachefs_transaction,sblink);
 		list_del_init(&trans->sblink);
 
 		cachefs_ujnl_set_phase(trans,
@@ -585,18 +599,20 @@
 	jepp	= 1 << jepp_b;
 
 	do {
-		/* work out how many pages to write to the journal in this chunk */
+		/* work out how many pages to write to the journal in this
+		 * chunk */
 		bix = super->ujnl_tail >> jepp_b;
 
 		chunk = jstop;
-		if (jstop<super->ujnl_tail)
+		if (jstop < super->ujnl_tail)
 			chunk = CACHEFS_ONDISC_UJNL_NUMENTS;
 
 		npages = (chunk + jepp - 1) >> jepp_b;
 		npages -= bix;
 
 		tmp = bio_get_nr_vecs(super->sb->s_bdev);
-		if (npages>tmp) npages = tmp;
+		if (npages > tmp)
+			npages = tmp;
 
 		bix += super->layout->bix_ujournal;
 
@@ -605,7 +621,8 @@
 		       chunk + super->ujnl_jsof - 1,
 		       npages,bix);
 
-		/* allocate a BIO big enough to send as many as possible in one go */
+		/* allocate a BIO big enough to send as many as possible in one
+		 * go */
 		bio = NULL;
 		tmp = npages;
 		for (;;) {
@@ -642,9 +659,10 @@
 		while (chunk > super->ujnl_tail) {
 			unsigned offset, len;
 
-			if (cachefs_block_find(super,bix,&block)<0) BUG();
+			if (cachefs_block_find(super,bix,&block) < 0)
+				BUG();
 
-			offset = (super->ujnl_tail & (jepp-1));
+			offset = (super->ujnl_tail & (jepp - 1));
 
 			len = chunk - super->ujnl_tail;
 			if (len > jepp-offset)
@@ -653,15 +671,15 @@
 			_debug("jpage: pg=%p b=%05u o=%03hx l=%03hx",
 			       block->page,
 			       block->bix,
-			       offset<<super->sb->s_blocksize_bits,
-			       len<<super->sb->s_blocksize_bits);
+			       offset << super->sb->s_blocksize_bits,
+			       len << super->sb->s_blocksize_bits);
 
 			tmp = bio_add_page(bio,
 					   block->page,
-					   len<<super->sb->s_blocksize_bits,
-					   offset<<super->sb->s_blocksize_bits);
+					   len << super->sb->s_blocksize_bits,
+					   offset << super->sb->s_blocksize_bits);
 
-			cachefs_block_put(super,block);
+			cachefs_block_put(block);
 			block = NULL;
 			if (!tmp)
 				break;
@@ -680,7 +698,7 @@
 		else
 			submit_bio(WRITE,bio);
 
-	} while (super->ujnl_tail!=jstop);
+	} while (super->ujnl_tail != jstop);
 
 	_leave("");
 	return;
@@ -870,7 +888,7 @@
 
 	submit_bio(WRITE | (1 << BIO_RW_BARRIER),bio);
 
-	cachefs_block_put(super,jblock);
+	cachefs_block_put(jblock);
 	_leave("");
 	return;
 
@@ -964,7 +982,7 @@
 
 	dbgpgfree(jpage);
 	page_cache_release(jpage);
-	cachefs_block_put(super,jblock);
+	cachefs_block_put(jblock);
 	_leave("");
 	return;
 

Index: io.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/io.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- io.c	24 Apr 2003 12:02:34 -0000	1.12
+++ io.c	23 May 2003 12:59:21 -0000	1.13
@@ -130,12 +130,12 @@
 {
 	struct page *page;
 
-	write_lock(&mapping->page_lock);
+	spin_lock(&mapping->page_lock);
 
 	list_splice_init(&mapping->dirty_pages,&mapping->io_pages);
 
 	if (list_empty(&mapping->dirty_pages) && list_empty(&mapping->io_pages)) {
-		write_unlock(&mapping->page_lock);
+		spin_unlock(&mapping->page_lock);
 		return 0;
 	}
 
@@ -148,7 +148,7 @@
 		printk("  - pg %lu\n",page->index);
 	}
 
-	write_unlock(&mapping->page_lock);
+	spin_unlock(&mapping->page_lock);
 
 	BUG();
 	return -EIO;
@@ -190,7 +190,7 @@
 
 	_enter("");
 
-	ret = cachefs_get_page_private(page,&pageio,GFP_KERNEL);
+	ret = cachefs_page_get_private(page,&pageio,GFP_KERNEL);
 	if (ret<0)
 		goto error;
 
@@ -198,8 +198,6 @@
 	if (ret<0)
 		goto error;
 
-	SetPageMappedToDisk(page);
-
 	/* dispatch the outstanding BIO if the pages are not adjacent */
 	if (*_bio && *last_block_in_bio != page->index-1)
 		cachefs_vio_io_bio_submit(_bio);
@@ -282,7 +280,7 @@
 
 	_enter(",{%lu}",page->index);
 
-	ret = cachefs_get_page_private(page,&pageio,GFP_KERNEL);
+	ret = cachefs_page_get_private(page,&pageio,GFP_KERNEL);
 	if (ret<0)
 		return ret;
 
@@ -290,8 +288,6 @@
 	if (ret<0)
 		return ret;
 
-	SetPageMappedToDisk(page);
-
 	ret = cachefs_vio_io_alloc(inode->i_sb,page->index,1,GFP_KERNEL,&bio);
 	if (ret==0) {
 		if (!bio_add_page(bio,page,PAGE_SIZE,0)) BUG();
@@ -320,7 +316,6 @@
 
 	pageio = (struct cachefs_page*) page->private;
 	block = xchg(&pageio->mapped_block,NULL);
-	ClearPageMappedToDisk(page);
 	pageio->flags = 0;
 
 	if (block) {
@@ -332,7 +327,7 @@
 			BUG();
 		}
 
-		cachefs_block_put(page->mapping->host->i_sb->s_fs_info,block);
+		cachefs_block_put(block);
 	}
 
 	/*
@@ -362,12 +357,11 @@
 		pageio = (struct cachefs_page*) page->private;
 		page->private = 0;
 		ClearPagePrivate(page);
-		ClearPageMappedToDisk(page);
 
 		if (pageio) {
 			block = xchg(&pageio->mapped_block,NULL);
 			if (block)
-				cachefs_block_put(page->mapping->host->i_sb->s_fs_info,block);
+				cachefs_block_put(block);
 			dbgfree(pageio);
 			kfree(pageio);
 		}

Index: interface.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/interface.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- interface.c	29 Apr 2003 15:24:06 -0000	1.5
+++ interface.c	23 May 2003 12:59:21 -0000	1.6
@@ -12,6 +12,13 @@
 #include <linux/module.h>
 #include "cachefs-int.h"
 
+struct cachefs_io_end {
+	cachefs_rw_complete_t	func;
+	void			*data;
+	void			*cookie_data;
+	struct cachefs_block	*block;
+};
+
 LIST_HEAD(cachefs_netfs_list);
 LIST_HEAD(cachefs_cache_list);
 DECLARE_RWSEM(cachefs_addremove_sem);
@@ -189,7 +196,18 @@
 	list_del_init(&super->mnt_link);
 	up_write(&cachefs_addremove_sem);
 
-	/* we now have to destroy all the active inodes pertaining to this superblock */
+	/* mark all inodes as being withdrawn */
+	spin_lock(&super->ino_list_lock);
+	list_for_each_entry(inode,&super->ino_list,super_link) {
+		set_bit(CACHEFS_ACTIVE_INODE_WITHDRAWN,&inode->flags);
+	}
+	spin_unlock(&super->ino_list_lock);
+
+	/* mark all active blocks a being withdrawn */
+	cachefs_block_withdraw(super);
+
+	/* we now have to destroy all the active inodes pertaining to this
+	 * superblock */
 	spin_lock(&super->ino_list_lock);
 	while (!list_empty(&super->ino_list)) {
 		inode = list_entry(super->ino_list.next,struct cachefs_inode,super_link);
@@ -209,6 +227,65 @@
 
 /*****************************************************************************/
 /*
+ * withdraw an inode from active service
+ * - need break the links to a cached object cookie
+ * - called under two situations:
+ *   (1) recycler decides to reclaim an in-use inode
+ *   (2) a cache is unmounted
+ * - have to take care as the cookie can be being relinquished by the netfs simultaneously
+ * - the active inode is pinned by the caller holding a refcount on it
+ */
+void cachefs_withdraw_inode(struct cachefs_inode *inode)
+{
+	struct cachefs_search_result *srch;
+	struct cachefs_cookie *cookie, *xcookie = NULL;
+
+	kenter("{%lu}",inode->vfs_inode.i_ino);
+
+	/* first of all we have to break the links between the inode and the cookie
+	 * - we have to hold both semaphores BUT we have to get the cookie sem FIRST
+	 */
+	down(&inode->vfs_inode.i_sem);
+
+	cookie = inode->cookie;
+	if (cookie) {
+		atomic_inc(&cookie->usage); /* pin the cookie */
+
+		up(&inode->vfs_inode.i_sem); /* re-order the locks to avoid deadlock */
+		down_write(&cookie->sem);
+		down(&inode->vfs_inode.i_sem);
+
+		list_del_init(&inode->cookie_link);
+
+		xcookie = inode->cookie;
+		inode->cookie = NULL;
+
+		list_for_each_entry(srch,&cookie->search_results,link) {
+			if (srch->super==inode->vfs_inode.i_sb->s_fs_info)
+				goto found_srch;
+		}
+	found_srch:
+		list_del(&srch->link);
+		kfree(srch);
+
+		up_write(&cookie->sem);
+	}
+
+	up(&inode->vfs_inode.i_sem);
+
+	if (cookie)
+		cachefs_cookie_put(cookie);
+
+	if (xcookie) {
+		cachefs_cookie_put(xcookie);
+		cachefs_iput(inode);
+	}
+
+	kleave("");
+} /* end cachefs_withdraw_inode() */
+
+/*****************************************************************************/
+/*
  * search for representation of an object in its parent cache
  * - the cookie must be locked by the caller
  * - returns -ENODATA if the object or one of its ancestors doesn't exist
@@ -627,67 +704,6 @@
 
 /*****************************************************************************/
 /*
- * withdraw an inode from active service
- * - need break the links to a cached object cookie
- * - called under two situations:
- *   (1) recycler decides to reclaim an in-use inode
- *   (2) a cache is unmounted
- * - have to take care as the cookie can be being relinquished by the netfs simultaneously
- * - the active inode is pinned by the caller holding a refcount on it
- */
-void cachefs_withdraw_inode(struct cachefs_inode *inode)
-{
-	struct cachefs_search_result *srch;
-	struct cachefs_cookie *cookie, *xcookie = NULL;
-
-	kenter("{%lu}",inode->vfs_inode.i_ino);
-
-	set_bit(CACHEFS_ACTIVE_INODE_WITHDRAWN,&inode->flags);
-
-	/* first of all we have to break the links between the inode and the cookie
-	 * - we have to hold both semaphores BUT we have to get the cookie sem FIRST
-	 */
-	down(&inode->vfs_inode.i_sem);
-
-	cookie = inode->cookie;
-	if (cookie) {
-		atomic_inc(&cookie->usage); /* pin the cookie */
-
-		up(&inode->vfs_inode.i_sem); /* re-order the locks to avoid deadlock */
-		down_write(&cookie->sem);
-		down(&inode->vfs_inode.i_sem);
-
-		list_del_init(&inode->cookie_link);
-
-		xcookie = inode->cookie;
-		inode->cookie = NULL;
-
-		list_for_each_entry(srch,&cookie->search_results,link) {
-			if (srch->super==inode->vfs_inode.i_sb->s_fs_info)
-				goto found_srch;
-		}
-	found_srch:
-		list_del(&srch->link);
-		kfree(srch);
-
-		up_write(&cookie->sem);
-	}
-
-	up(&inode->vfs_inode.i_sem);
-
-	if (cookie)
-		cachefs_cookie_put(cookie);
-
-	if (xcookie) {
-		cachefs_cookie_put(xcookie);
-		cachefs_iput(inode);
-	}
-
-	kleave("");
-} /* end cachefs_withdraw_active_inode() */
-
-/*****************************************************************************/
-/*
  * see if the netfs definition matches
  */
 static cachefs_match_val_t cachefs_fsdef_index_match(void *target, const void *entry)
@@ -748,37 +764,6 @@
 
 /*****************************************************************************/
 /*
- * destroy an active inode
- * - called with super->ino_tree_lock held
- */
-#if 0 // TODO: remove
-static void __cachefs_put_active_inode(struct cachefs_active_inode *active)
-{
-	struct cachefs_super *super;
-
-	kenter("");
-
-	/* remove from the cache's inode tree */
-	super = active->super;
-
-	if (active->ino && !test_bit(CACHEFS_ACTIVE_INODE_WITHDRAWN,&active->flags))
-		rb_erase(&active->node,&super->ino_tree);
-
-	spin_unlock(&super->ino_tree_lock);
-
-	if (active->storage) {
-		BUG();
-		cachefs_block_put(active->super,active->storage);
-	}
-
-	kfree(active);
-
-	kleave("");
-} /* end __cachefs_put_active_inode() */
-#endif
-
-/*****************************************************************************/
-/*
  * initialise an cookie jar slab element prior to any use
  */
 void cachefs_cookie_init_once(void *_cookie, kmem_cache_t *cachep, unsigned long flags)
@@ -796,92 +781,251 @@
 
 /*****************************************************************************/
 /*
- * see if a page is cached in a block attached to the cookie
- * - if it is:
- *   - the page cookie will have been changed so that mapped_block refers to the block
- *   - a BIO will have been dispatched to load the page with end_io as the completion func
- *     - end_io can be NULL, in which case a default function will just unlock the page
- *   - 0 will be returned
- * - if it is not:
- *   - no cache space will be allocated
+ * handle a page having been read
+ */
+static int cachefs_page_read(struct bio *bio, unsigned int bytes_done, int error)
+{
+	struct cachefs_io_end *end_io = bio->bi_private;
+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+
+	kenter("%p{%u},%u,%d",bio,bio->bi_size,bytes_done,error);
+
+	if (bio->bi_size>0) {
+		_leave(" = 1");
+		return 1;
+	}
+
+	for (; bvec>=bio->bi_io_vec; bvec--)
+		end_io->func(end_io->cookie_data,bvec->bv_page,end_io->data);
+
+	cachefs_block_put(end_io->block);
+	kfree(end_io);
+	bio_put(bio);
+	_leave(" = 0");
+	return 0;
+} /* end cachefs_page_read() */
+
+/*****************************************************************************/
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - if the cookie is not backed by a file:
+ *   - -ENOBUFS will be returned and nothing more will be done
+ * - else if the page is backed by a block in the cache:
+ *   - a read will be started which will call end_io_data on completion
+ *   - the wb-journal will be searched for an entry pertaining to this block
+ *     - if an entry is found:
+ *       - 1 will be returned
+ *       else
+ *       - 0 will be returned
+ * - else if the page is unbacked:
+ *   - a block will be allocated and attached
+ *   - the v-journal will be marked to note the block contains invalid data
  *   - -ENODATA will be returned
- * - the page must be locked before calling
- * - netfs->ops->get_page_cookie() will be called to gain access to the page cookie
  */
-#if 0
-int cachefs_read_page(struct cachefs_cookie *cookie, struct page *page, bio_end_io_t end_io)
+int cachefs_read_or_alloc_page(struct cachefs_cookie *cookie,
+			       struct page *page,
+			       cachefs_rw_complete_t end_io_func,
+			       void *end_io_data,
+			       unsigned long gfp)
 {
+	struct cachefs_io_end *end_io = NULL;
+	struct cachefs_inode *inode;
 	struct cachefs_block *block;
 	struct cachefs_page *pageio;
-	struct super_block *sb;
-	unsigned long flags;
-	struct bio *bio;
+	struct bio *bio = NULL;
 	int ret;
 
-	kenter("{%s},{%lu}",netfs->name,page->index);
+	kenter("%p,{%lu},",cookie,page->index);
 
-	/* not supposed to use this for indexes */
-	BUG_ON(cookie->idef);
+	if (!cookie) {
+		kleave(" -ENOBUFS [no cookie]");
+		return -ENOBUFS; /* no actual cookie */
+	}
+
+	BUG_ON(cookie->idef); /* not supposed to use this for indexes */
 
 	ret = cookie->netfs->ops->get_page_cookie(page,&pageio);
-	if (ret<0)
+	if (ret<0) {
+		kleave(" = %d",ret);
+		return ret;
+	}
+
+	/* prevent the file from being uncached whilst we access it */
+	block = NULL;
+	down_read(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (list_empty(&cookie->backing_inodes))
 		goto error;
 
-	/* protect against cache removal */
-	read_lock_irqsave(&pageio->lock,flags);
+	/* handle the case of there already being a mapping,
+	 * - must protect against cache removal
+	 */
+	kdebug("check mapping");
+	read_lock(&pageio->lock);
 
 	block = pageio->mapped_block;
-	if (block) {
-		if (!test_bit(CACHEFS_SUPER_WITHDRAWN,&block->super->flags))
-			cachefs_block_get(block);
-		else
-			block = NULL;
+	if (block && !test_bit(CACHEFS_SUPER_WITHDRAWN,&block->super->flags))
+		goto available;
+
+	read_unlock(&pageio->lock);
+	block = NULL;
+
+	/* we don't know of a backing page, but there may be one recorded on
+	 * disc... and if there isn't we'll request one be allocated */
+	kdebug("igrab");
+	inode = cachefs_igrab(list_entry(cookie->backing_inodes.next,
+					 struct cachefs_inode,
+					 cookie_link));
+	ret = -ENOBUFS;
+	if (!inode)
+		goto error;
+
+	kdebug("get block");
+	down(&inode->vfs_inode.i_sem);
+	ret = cachefs_get_block(&inode->vfs_inode,page,pageio,1);
+	if (ret<0)
+		goto error_i;
+
+	if (!test_and_clear_bit(CACHEFS_PAGE_NEW,&pageio->flags)) {
+		read_lock(&pageio->lock);
+
+		block = pageio->mapped_block;
+		if (block && !test_bit(CACHEFS_SUPER_WITHDRAWN,&block->super->flags))
+			goto available_i;
+
+		read_unlock(&pageio->lock);
+		block = NULL;
+		ret = -ENOBUFS;
+		goto error_i;
 	}
 
-	read_unlock_irqrestore(&pageio->lock,flags);
+	up(&inode->vfs_inode.i_sem);
+	cachefs_iput(inode);
 
-	ret = -ENODATA;
-	if (!block)
+	write_lock(&pageio->mapped_block->ref_lock);
+	pageio->mapped_block->ref = pageio;
+	write_unlock(&pageio->mapped_block->ref_lock);
+
+	/* new block allocated, but no data */
+	kdebug("no data [bix=%u]",pageio->mapped_block->bix);
+	up_read(&cookie->sem);
+	kleave(" = -ENODATA");
+	return -ENODATA;
+
+	/* load the contents of the block into the specified page */
+ available_i:
+	kdebug("available_i");
+	up(&inode->vfs_inode.i_sem);
+	cachefs_iput(inode);
+ available:
+	kdebug("available");
+	cachefs_block_get(block);
+	read_unlock(&pageio->lock);
+
+	ret = -ENOMEM;
+	end_io = kmalloc(sizeof(*end_io),gfp);
+	if (!end_io)
 		goto error;
 
-	/* generate a BIO to read the page */
+	end_io->func		= end_io_func;
+	end_io->data		= end_io_data;
+	end_io->cookie_data	= cookie->netfs_data;
+	end_io->block		= block;
+
 	ret = -ENOMEM;
-	bio = bio_alloc(GFP_KERNEL,1);
+	bio = bio_alloc(gfp,1);
 	if (!bio)
 		goto error;
 
-	sb = block->super->sb;
+	bio->bi_bdev	= block->super->sb->s_bdev;
+	bio->bi_private	= end_io;
+	bio->bi_end_io	= cachefs_page_read;
+	bio->bi_sector	= block->bix;
+	bio->bi_sector	<<= PAGE_SHIFT - block->super->sb->s_blocksize_bits;
+
+	if (!bio_add_page(bio,page,PAGE_SIZE,0))
+		BUG();
 
-	bio->bi_bdev	= sb->s_bdev;
-	bio->bi_sector	= block->bix << (PAGE_SHIFT - sb->s_blocksize_bits);
-	bio->bi_end_io	= cachefs_io_end_io_read;
-	//dump_bio(bio,1);
 	submit_bio(READ,bio);
-	ret = 0;
 
+	kdebug("done");
+	up_read(&cookie->sem);
+
+	write_lock(&block->ref_lock);
+	block->ref = pageio;
+	write_unlock(&block->ref_lock);
+
+	kleave(" = 0");
+	return 0;
+
+ error_i:
+	kdebug("error_i");
+	up(&inode->vfs_inode.i_sem);
+	cachefs_iput(inode);
  error:
+	kdebug("error");
+	up_read(&cookie->sem);
+	if (block)	cachefs_block_put(block);
+	if (bio)	bio_put(bio);
+	if (end_io)	kfree(end_io);
 	kleave(" = %d",ret);
 	return ret;
-} /* end cachefs_read_page() */
-#endif
+} /* end cachefs_read_or_alloc_page() */
+
+EXPORT_SYMBOL(cachefs_read_or_alloc_page);
 
 /*****************************************************************************/
 /*
- * write a page to the cache
+ * handle a page having been written
  */
-#if 0
-int __cachefs_write_page(struct cachefs_cookie *cookie,
-			 struct page *page,
-			 cachefs_rw_complete_t end_io,
-			 void *end_io_data)
+static int cachefs_page_written(struct bio *bio, unsigned int bytes_done, int error)
+{
+	struct cachefs_io_end *end_io = bio->bi_private;
+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+
+	kenter("%p{%u},%u,%d",bio,bio->bi_size,bytes_done,error);
+
+	if (bio->bi_size>0) {
+		_leave(" = 1");
+		return 1;
+	}
+
+	for (; bvec>=bio->bi_io_vec; bvec--)
+		end_io->func(end_io->cookie_data,bvec->bv_page,end_io->data);
+
+	cachefs_block_put(end_io->block);
+	kfree(end_io);
+	bio_put(bio);
+	_leave(" = 0");
+	return 0;
+} /* end cachefs_page_written() */
+
+/*****************************************************************************/
+/*
+ * request a page be stored in the cache
+ * - this request may be ignored if no cache block is currently attached, in which case it
+ *   - returns -ENOBUFS
+ * - if a cache block was already allocated:
+ *   - the page cookie will be updated to reflect the block selected
+ *   - a BIO will have been dispatched to write the page with end_io as the completion func
+ *     - end_io can be NULL, in which case a default function will just clear the writeback bit
+ *   - if a page is associated with a v-journal entry, that entry will be erased
+ *   - returns 0
+ */
+int cachefs_write_page(struct cachefs_cookie *cookie,
+		       struct page *page,
+		       cachefs_rw_complete_t end_io_func,
+		       void *end_io_data,
+		       unsigned long gfp)
 {
+	struct cachefs_io_end *end_io = NULL;
 	struct cachefs_block *block;
 	struct cachefs_page *pageio;
-	struct super_block *sb;
-	struct bio *bio;
+	struct bio *bio = NULL;
 	int ret;
 
-	kenter("%p,{%lu},",page->index);
+	kenter("%p,{%lu},",cookie,page->index);
 
 	if (!cookie) {
 		kleave(" -ENOBUFS [no cookie]");
@@ -891,33 +1035,189 @@
 	BUG_ON(cookie->idef); /* not supposed to use this for indexes */
 
 	ret = cookie->netfs->ops->get_page_cookie(page,&pageio);
-	if (ret<0)
-		goto error;
+	if (ret<0) {
+		kleave(" = %d",ret);
+		return ret;
+	}
 
 	/* prevent the file from been uncached whilst we deal with it */
 	down_read(&cookie->sem);
+	read_lock(&pageio->lock);
+
+	block = pageio->mapped_block;
+	if (block && !test_bit(CACHEFS_SUPER_WITHDRAWN,&block->super->flags))
+		goto write;
+
+	read_unlock(&pageio->lock);
+	up_read(&cookie->sem);
+	kleave(" = -ENOBUFS");
+	return -ENOBUFS;
+
+ write:
+	kdebug("write [bix=%u]",block->bix);
+	cachefs_block_get(block);
+	read_unlock(&pageio->lock);
+
+	ret = -ENOMEM;
+	end_io = kmalloc(sizeof(*end_io),gfp);
+	if (!end_io)
+		goto error;
+
+	end_io->func		= end_io_func;
+	end_io->data		= end_io_data;
+	end_io->cookie_data	= cookie->netfs_data;
+	end_io->block		= block;
+
+	ret = -ENOMEM;
+	bio = bio_alloc(gfp,1);
+	if (!bio)
+		goto error;
+
+	bio->bi_bdev	= block->super->sb->s_bdev;
+	bio->bi_private	= end_io;
+	bio->bi_end_io	= cachefs_page_written;
+	bio->bi_sector	= block->bix;
+	bio->bi_sector	<<= PAGE_SHIFT - block->super->sb->s_blocksize_bits;
+
+	kdebug("%u,%u,%llu",
+	       block->bix,block->super->sb->s_blocksize_bits,bio->bi_sector);
+
+	if (!bio_add_page(bio,page,PAGE_SIZE,0))
+		BUG();
+
+	dump_bio(bio,1);
+	submit_bio(WRITE,bio);
+
+	up_read(&cookie->sem);
+	kleave(" = 0");
+	return 0;
+
+ error:
+	kdebug("error");
+	cachefs_block_put(block);
+	up_read(&cookie->sem);
+	if (bio)
+		bio_put(bio);
+	if (end_io)
+		kfree(end_io);
+	kleave(" = %d",ret);
+	return ret;
+} /* end cachefs_write_page() */
+
+EXPORT_SYMBOL(cachefs_write_page);
+
+/*****************************************************************************/
+/*
+ * remove a page from the cache
+ * - if the block backing the page still has a vjentry then the block will be
+ *   recycled
+ */
+void __cachefs_uncache_page(struct cachefs_cookie *cookie, struct page *page)
+{
+	struct cachefs_block *block, *xblock;
+	struct cachefs_page *pageio;
+	int ret;
+
+	kenter(",{%lu}",page->index);
 
-	if (list_empty(&cookie->active_inodes)) {
-		up_read(&cookie->sem);
-		kleave(" -ENOBUFS [no inode]");
-		return -ENOBUFS;
+	if (!cookie) {
+		kleave(" [no cookie]");
+		return;
 	}
 
-	/* handle the case of there already being a mapping,
-	 * - must protect against cache removal
-	 */
-	read_lock(&pageio->lock);
+	BUG_ON(cookie->idef); /* not supposed to use this for indexes */
 
+	ret = cookie->netfs->ops->get_page_cookie(page,&pageio);
+	if (ret<0) {
+		kleave(" [get_page_cookie() =:%d]",ret);
+		return;
+	}
+
+	/* un-cross-link the page cookie and the block */
+	xblock = NULL;
+	write_lock(&pageio->lock);
 	block = pageio->mapped_block;
-	if (block && !test_bit(CACHEFS_SUPER_WITHDRAWN,&block->super->flags)) {
-		cachefs_block_get(block);
-		read_unlock(&pageio->lock);
+	if (block) {
+		pageio->mapped_block = NULL; /* pin the block */
+		write_unlock(&pageio->lock);
 
+		/* locking order needs to be reversed */
+		write_lock(&block->ref_lock);
+		write_lock(&pageio->lock);
+		block->ref = NULL;
+		write_unlock(&block->ref_lock);
+	}
+	write_unlock(&pageio->lock);
 
+	if (block) {
+		if (block->vjentry)
+			cachefs_vj_cancel(block->vjentry);
+		cachefs_block_put(block);
 	}
 
-	read_unlock(&pageio->lock);
+	kleave("");
+	return;
+} /* end __cachefs_uncache_page() */
+
+EXPORT_SYMBOL(__cachefs_uncache_page);
 
+/*****************************************************************************/
+/*
+ * try and read a page from the cache
+ */
+#if 0
+int cachefs_read_page(struct cachefs_cookie *cookie, struct page *page, bio_end_io_t end_io)
+{
+	struct cachefs_block *block;
+	struct cachefs_page *pageio;
+	struct super_block *sb;
+	unsigned long flags;
+	struct bio *bio;
+	int ret;
 
-} /* end __cachefs_write_page() */
+	kenter("{%s},{%lu}",netfs->name,page->index);
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->idef);
+
+	ret = cookie->netfs->ops->get_page_cookie(page,&pageio);
+	if (ret<0)
+		goto error;
+
+	/* protect against cache removal */
+	read_lock_irqsave(&pageio->lock,flags);
+
+	block = pageio->mapped_block;
+	if (block) {
+		if (!test_bit(CACHEFS_SUPER_WITHDRAWN,&block->super->flags))
+			cachefs_block_get(block);
+		else
+			block = NULL;
+	}
+
+	read_unlock_irqrestore(&pageio->lock,flags);
+
+	ret = -ENODATA;
+	if (!block)
+		goto error;
+
+	/* generate a BIO to read the page */
+	ret = -ENOMEM;
+	bio = bio_alloc(GFP_KERNEL,1);
+	if (!bio)
+		goto error;
+
+	sb = block->super->sb;
+
+	bio->bi_bdev	= sb->s_bdev;
+	bio->bi_sector	= block->bix << (PAGE_SHIFT - sb->s_blocksize_bits);
+	bio->bi_end_io	= cachefs_io_end_io_read;
+	//dump_bio(bio,1);
+	submit_bio(READ,bio);
+	ret = 0;
+
+ error:
+	kleave(" = %d",ret);
+	return ret;
+} /* end cachefs_read_page() */
 #endif

Index: index.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/index.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- index.c	29 Apr 2003 15:24:06 -0000	1.16
+++ index.c	23 May 2003 12:59:21 -0000	1.17
@@ -172,7 +172,7 @@
 	iinode = data;
 	kenter("{%lu},%p{%lu}",iinode->vfs_inode.i_ino,page,page->index);
 
-	ret = cachefs_get_page_private(page,&pageio,GFP_KERNEL);
+	ret = cachefs_page_get_private(page,&pageio,GFP_KERNEL);
 	if (ret<0)
 		return ret;
 
@@ -240,6 +240,8 @@
 	/* we may need to extend the index file */
 	if (newentry==UINT_MAX) {
 		pgnum = iinode->vfs_inode.i_size >> PAGE_SHIFT;
+
+		/* we need to get the new contents for this block ready in advance */
 		page = read_cache_page(iinode->vfs_inode.i_mapping,pgnum,
 				       cachefs_index_preinit_page,iinode);
 		dbgpgalloc(page);
@@ -251,7 +253,8 @@
 		iinode->vfs_inode.i_size += PAGE_SIZE;
 
 		/* make the extension */
-		ret = cachefs_get_block(&iinode->vfs_inode,page,1);
+		ret = cachefs_get_block(&iinode->vfs_inode,page,
+					__cachefs_page_get_private(page),1);
 		if (ret<0) {
 			iinode->vfs_inode.i_size -= PAGE_SIZE;
 			goto error2;
@@ -365,9 +368,12 @@
 	ino_offset = ino % super->istorage->index_epp;
 	ino_offset <<= super->layout->storage_bits;
 
-	cachefs_trans_affects_page(trans,ixpage,offset,index->index_esize);
+	cachefs_trans_affects_page(trans,__cachefs_page_get_private(ixpage),offset,
+				   index->index_esize);
+	cachefs_trans_affects_page(trans,__cachefs_page_get_private(inopage),ino_offset,
+				   super->layout->storage_size);
+
 	cachefs_trans_affects_inode(trans,index);
-	cachefs_trans_affects_page(trans,inopage,ino_offset,super->layout->storage_size);
 	cachefs_trans_affects_inode(trans,super->istorage);
 
 	ret = cachefs_trans_mark(trans);

Index: dump-journal.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/dump-journal.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- dump-journal.c	2 Apr 2003 09:12:31 -0000	1.8
+++ dump-journal.c	23 May 2003 12:59:22 -0000	1.9
@@ -56,6 +56,7 @@
 	BRIGHT	"InoRcming"	NORMAL,
 	BRIGHT	"DataAlloc"	NORMAL,
 	BRIGHT	"DataWrite"	NORMAL,
+	BRIGHT	"DataUnalc"	NORMAL,
 	BRIGHT	"IndrAlloc"	NORMAL,
 	BRIGHT	"IndrFree "	NORMAL,
 	BRIGHT	"IndexExtn"	NORMAL,
@@ -110,10 +111,10 @@
 		printf("uninitialised CacheFS filesystem");
 
 		if (super->bix_ujournal!=2) exit(0);
-		if (super->bix_wbjournal > super->bix_ujournal+65536/512) exit(0);
+		if (super->bix_vjournal > super->bix_ujournal+65536/512) exit(0);
 	}
 
-	numujnl = super->bix_wbjournal - super->bix_ujournal;
+	numujnl = super->bix_vjournal - super->bix_ujournal;
 	numujnl *= 4096;
 
 	jsof = super->bix_ujournal * (4096 / super->ujnl_rsize);

Index: cachetest-main.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/cachetest-main.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- cachetest-main.c	24 Apr 2003 12:02:34 -0000	1.8
+++ cachetest-main.c	23 May 2003 12:59:22 -0000	1.9
@@ -56,20 +56,23 @@
 static void cell_update(void *source, void *entry);
 
 static struct cachefs_index_def cachetest_cell_index_def = {
-	.name		= "cell_ix",
-	.data_size	= 124,
-	.keys[0]	= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-	.match		= cell_match,
-	.update		= cell_update,
+	.name			= "cell_ix",
+	.data_size		= 124,
+	.keys[0]		= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
+	.match			= cell_match,
+	.update			= cell_update,
 };
 
+static int afs_get_page_cookie(struct page *page, struct cachefs_page **_page_cookie);
+
 static struct cachefs_netfs_operations cachetest_ops = {
+	.get_page_cookie	= afs_get_page_cookie,
 };
 
 static struct cachefs_netfs cachetest = {
-	.name		= "cachetest",
-	.version	= 0,
-	.ops		= &cachetest_ops,
+	.name			= "cachetest",
+	.version		= 0,
+	.ops			= &cachetest_ops,
 };
 
 /* AFS cell
@@ -118,6 +121,21 @@
 	.fid		= 0x1234abcd,
 };
 
+static struct page *afs_data_page;
+
+static struct cachefs_page afs_data_page_cookie = {
+	.lock		= RW_LOCK_UNLOCKED,
+};
+
+static struct page *afs_data_page2;
+
+static struct cachefs_page afs_data_page2_cookie = {
+	.lock		= RW_LOCK_UNLOCKED,
+};
+
+static void afs_page_cache_write_complete(void *cookie_data, struct page *page, void *data);
+static void afs_page_cache_read_complete(void *cookie_data, struct page *page, void *data);
+
 /*****************************************************************************/
 /*
  * initialise the fs caching module
@@ -128,9 +146,21 @@
 
 	printk(KERN_INFO "cachefstest: general fs caching v0.1 tester registering\n");
 
+	ret = -ENOMEM;
+	afs_data_page = alloc_pages(GFP_KERNEL,1);
+	if (!afs_data_page)
+		goto error;
+
+	afs_data_page2 = afs_data_page + 1;
+	afs_data_page->index  = 0;
+	afs_data_page2->index = 1;
+
+	memset(page_address(afs_data_page ),0xde,PAGE_SIZE);
+	memset(page_address(afs_data_page2),0xdf,PAGE_SIZE);
+
 	ret = cachefs_register_netfs(&cachetest,&cachetest_cell_index_def);
 	if (ret<0)
-		goto error;
+		goto error2;
 
 	printk("\n### Register cell object\n");
 	cachefs_acquire_cookie(cachetest.primary_index,
@@ -150,8 +180,42 @@
 			       &afs_root_dir,
 			       &afs_root_dir.cache);
 
+	printk("\n### Read page\n");
+	ret = cachefs_read_or_alloc_page(afs_root_dir.cache,
+					 afs_data_page,
+					 afs_page_cache_read_complete,
+					 (void*)7,
+					 GFP_KERNEL);
+
+	printk("\n### Read page 2\n");
+	ret = cachefs_read_or_alloc_page(afs_root_dir.cache,
+					 afs_data_page2,
+					 afs_page_cache_read_complete,
+					 (void*)9,
+					 GFP_KERNEL);
+
+	printk("\n### Write page\n");
+	ret = cachefs_write_page(afs_root_dir.cache,
+				 afs_data_page,
+				 afs_page_cache_write_complete,
+				 (void*)3,
+				 GFP_KERNEL);
+
+	printk("\nresult = %d\n",ret);
+
+	printk("\n### Write page2\n");
+	ret = cachefs_write_page(afs_root_dir.cache,
+				 afs_data_page2,
+				 afs_page_cache_write_complete,
+				 (void*)23,
+				 GFP_KERNEL);
+
+	printk("\nresult = %d\n",ret);
+
 	return 0;
 
+ error2:
+	__free_pages(afs_data_page,1);
  error:
 	printk(KERN_ERR "cachefstest: failed to register: %d\n",ret);
 	return ret;
@@ -165,6 +229,11 @@
 {
 	printk(KERN_INFO "cachefstest: general fs caching v0.1 tester unregistering.\n");
 
+	cachefs_uncache_page(afs_root_dir.cache,afs_data_page ,&afs_data_page_cookie );
+	cachefs_uncache_page(afs_root_dir.cache,afs_data_page2,&afs_data_page2_cookie);
+
+	__free_pages(afs_data_page,1);
+
 	cachefs_relinquish_cookie(afs_root_dir.cache,0);
 	cachefs_relinquish_cookie(afs_root_volume.cache,0);
 	cachefs_relinquish_cookie(afs_mycell.cache,0);
@@ -263,6 +332,49 @@
 	memcpy(entry,&fid,sizeof(fid));
 
 } /* end file_update() */
+
+/*****************************************************************************/
+/*
+ * 
+ */
+static void afs_page_cache_write_complete(void *cookie_data, struct page *page, void *data)
+{
+	kenter("%p,%p,%p",cookie_data,page,data);
+
+} /* end afs_page_cache_write_complete() */
+
+/*****************************************************************************/
+/*
+ * 
+ */
+static void afs_page_cache_read_complete(void *cookie_data, struct page *page, void *data)
+{
+	kenter("%p,%p,%p",cookie_data,page,data);
+
+} /* end afs_page_cache_write_complete() */
+
+/*****************************************************************************/
+/*
+ * get a page cookie for the specified page
+ */
+static int afs_get_page_cookie(struct page *page, struct cachefs_page **_page_cookie)
+{
+	kenter("%p,",page);
+
+	if (page == afs_data_page) {
+		*_page_cookie = &afs_data_page_cookie;
+		kleave(" = 0 [%p]",*_page_cookie);
+		return 0;
+	}
+	else if (page == afs_data_page2) {
+		*_page_cookie = &afs_data_page2_cookie;
+		kleave(" = 0 [%p]",*_page_cookie);
+		return 0;
+	}
+
+	kleave(" = -ENOMEM");
+	return -ENOMEM;
+} /* end afs_get_page_cookie() */
 
 /*****************************************************************************/
 /*

Index: cachefs-layout.h
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/cachefs-layout.h,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- cachefs-layout.h	17 Apr 2003 11:59:04 -0000	1.21
+++ cachefs-layout.h	23 May 2003 12:59:22 -0000	1.22
@@ -56,6 +56,7 @@
 	u_int32_t			ujnl_rsize;	/* update journal record size */
 	u_int32_t			ujnl_recperblk;	/* u-journal records per block */
 	cachefs_blockix_t		bix_ujournal;	/* start of update journal */
+	cachefs_blockix_t		bix_vjournal;	/* start of invalid block journal */
 	cachefs_blockix_t		bix_wbjournal;	/* start of writeback journal */
 	cachefs_blockix_t		bix_cache;	/* start of data cache */
 	cachefs_blockix_t		bix_unready;	/* start of initially unallocated blocks */
@@ -291,13 +292,36 @@
 	 * - auxentry	= 0: alloc TOS leaf, 1: alloc TOS node
 	 * - upblock	= block which will point to this one
 	 * - upentry	= entry in block pointing to this one
+	 * - auxmark	= v-journal entry number
 	 */
 	CACHEFS_ONDISC_UJNL_DATA_ALLOCING,
 
 	/* beginning write on page in cache */
 	CACHEFS_ONDISC_UJNL_DATA_WRITING,
 
-	/* indirect block being allocated */
+	/* data block being unallocated
+	 * - ino	= inode for which block is being allocated
+	 * - pgnum	= which page of inode being allocated
+	 * - block	= block being recycled
+	 * - auxblock	= (old) front recycling node
+	 * - auxentry	= index into auxblock[] of leaf filled (or UINT_MAX if new node)
+	 * - upblock	= block from which transferred
+	 * - upentry	= entry in upblock[]
+	 * - auxmark	= v-journal entry number
+	 */
+	CACHEFS_ONDISC_UJNL_DATA_UNALLOCING,
+
+	/* indirect block being allocated
+	 * - auxmark	= which level being allocated
+	 * - ino	= inode for which block is being allocated
+	 * - pgnum	= which page of inode being allocated
+	 * - size	= current file size
+	 * - block	= block being allocated
+	 * - auxblock	= (auxentry==1) block holding 2OS of alloc stack (0 if stack now empty)
+	 * - auxentry	= 0: alloc TOS leaf, 1: alloc TOS node
+	 * - upblock	= block which will point to this one
+	 * - upentry	= entry in block pointing to this one
+	 */
 	CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING,
 
 	/* indirection block being detached for recycling
@@ -376,17 +400,38 @@
 
 /*****************************************************************************/
 /*
+ * on-disc block validity journal
+ * - blocks noted here don't yet have valid data downloaded from the remote server
+ * - unused entries have ino==0
+ * - changed under the influence of the u-journal
+ */
+struct cachefs_ondisc_validity_journal
+{
+	u_int32_t			ino;		/* inode number */
+	u_int32_t			pgnum;		/* page within inode */
+};
+
+#define CACHEFS_ONDISC_VJNL_ENTPERPAGE	\
+	(PAGE_SIZE / sizeof(struct cachefs_ondisc_writeback_journal))
+
+#define CACHEFS_ONDISC_VJNL_SIZE	16 /* blocks */
+
+#define CACHEFS_ONDISC_VJNL_ENTS \
+	(CACHEFS_ONDISC_VJNL_ENTPERPAGE * CACHEFS_ONDISC_VJNL_SIZE)
+
+/*****************************************************************************/
+/*
  * on-disc writeback journal
  * - records pages that are pending being written back to the server
  * - journal slots are allocated in a round-robin fashion, going through slot 0 in every sector,
  *   then slot 1 in every sector, then slot 2... this means that two adjacent marks are made on
  *   separate sectors, and so the second doesn't have to wait for the first to be written to disc
- * - must be written between update journal marks
  */
 struct cachefs_ondisc_writeback_journal
 {
 	u_int32_t			ino;		/* in-cache inode number */
-	u_int32_t			index;		/* affected page */
+	u_int32_t			size;		/* size of changed region */
+	u_int64_t			fpos;		/* start file position */
 	u_int8_t			fsdata[8];	/* FS-specific data */
 };
 

Index: cachefs-int.h
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/cachefs-int.h,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -r1.30 -r1.31
--- cachefs-int.h	29 Apr 2003 15:24:06 -0000	1.30
+++ cachefs-int.h	23 May 2003 12:59:22 -0000	1.31
@@ -29,6 +29,7 @@
 struct cachefs_block;
 struct cachefs_inode;
 struct cachefs_search_result;
+struct cachefs_transaction;
 
 extern struct address_space_operations cachefs_addrspace_operations;
 extern struct address_space_operations cachefs_vio_addrspace_operations;
@@ -50,7 +51,8 @@
 
 extern int cachefs_io_dummy_filler(void *data, struct page *page);
 
-extern int cachefs_get_block(struct inode *inode, struct page *page, int create);
+extern int cachefs_get_block(struct inode *inode, struct page *page, struct cachefs_page *pageio,
+			     int create);
 
 struct cachefs_reclaimable {
 	unsigned	ino;
@@ -155,6 +157,19 @@
 
 	struct list_head		jnld_link;	/* journalling daemon list */
 
+	/* validity journal tracking */
+	unsigned long			*vjnl_map;	/* bitmap of free entries (1 page) */
+	unsigned			vjnl_count;	/* number of free entries */
+	spinlock_t			vjnl_lock;	/* allocation lock */
+	wait_queue_head_t		vjnl_alloc_wq;	/* allocation queue */
+	struct list_head		vjnl_unallocq;	/* blocks requiring unallocation */
+
+	/* writeback journal tracking */
+	unsigned long			*wbj_map;	/* bitmap of free entries (1 page) */
+	unsigned			wbj_count;	/* number of free entries */
+	spinlock_t			wbj_lock;	/* allocation lock */
+	wait_queue_head_t		wbj_alloc_wq;	/* allocation queue */
+
 	/* cache management daemon for this fs */
 	task_t				*dmn_task;	/* cache daemon task */
 	struct completion		dmn_alive;	/* completion of initialisation */
@@ -182,6 +197,7 @@
 extern void cachefs_recycle_unready_blocks(struct cachefs_super *super);
 extern void cachefs_recycle_transfer_stack(struct cachefs_super *super);
 extern void cachefs_recycle_reclaim(struct cachefs_super *super);
+extern void cachefs_recycle_unallocate_data_block(struct cachefs_super *super);
 
 /*****************************************************************************/
 /*
@@ -257,14 +273,16 @@
 
 static inline void cachefs_iput(struct cachefs_inode *inode)
 {
-	iput(&inode->vfs_inode);
+	if (inode)
+		iput(&inode->vfs_inode);
 }
 
 extern int cachefs_get_page(struct cachefs_inode *inode, unsigned index, struct page **_page);
 
 static inline void cachefs_put_page(struct page *page)
 {
-	page_cache_release(page);
+	if (page)
+		page_cache_release(page);
 }
 
 extern void cachefs_withdraw_inode(struct cachefs_inode *inode);
@@ -299,6 +317,8 @@
 #define CACHEFS_BLOCK_UJOURNAL	5	/* [bit] block holds update journal entries */
 #define CACHEFS_BLOCK_CRITICAL	6	/* [bit] block holds critical data that mustn't be
 					 *       zapped until sync'd */
+#define CACHEFS_BLOCK_WITHDRAWN	7	/* [bit] backing cache withdrawn from service */
+#define CACHEFS_BLOCK_NETFSBUSY	8	/* [bit] netfs is accessing the block */
 
 #define _CACHEFS_BLOCK_ALLOC		(1 << CACHEFS_BLOCK_ALLOC)
 #define _CACHEFS_BLOCK_COW		(1 << CACHEFS_BLOCK_COW)
@@ -309,8 +329,14 @@
 	struct page			*page;		/* current data for this block */
 	struct page			*writeback;	/* source of writeback for this block */
 	struct cachefs_page		*ref;		/* netfs's ref to this page */
+	rwlock_t			ref_lock;	/* lock governing ref pointer */
+	struct cachefs_vj_entry		*vjentry;	/* invalid block record */
 };
 
+extern kmem_cache_t *cachefs_block_jar;
+
+extern void cachefs_block_init_once(void *_block, kmem_cache_t *cachep, unsigned long flags);
+
 extern int cachefs_block_insert(struct cachefs_super *super,
 				cachefs_blockix_t bix,
 				struct cachefs_block **_block);
@@ -321,11 +347,13 @@
 
 extern int cachefs_block_set(struct cachefs_super *super,
 			     struct cachefs_block *block,
-			     struct page *page);
+			     struct page *page,
+			     struct cachefs_page *pageio);
 
 extern int cachefs_block_set2(struct cachefs_super *super,
 			      cachefs_blockix_t bix,
 			      struct page *page,
+			      struct cachefs_page *pageio,
 			      struct cachefs_block **_block);
 
 extern int cachefs_block_read(struct cachefs_super *super,
@@ -347,21 +375,23 @@
 	return block;
 }
 
-extern void __cachefs_block_put(struct cachefs_super *super, struct cachefs_block *block);
+extern void __cachefs_block_put(struct cachefs_block *block);
 
-static inline void cachefs_block_put(struct cachefs_super *super, struct cachefs_block *block)
+static inline void cachefs_block_put(struct cachefs_block *block)
 {
-	int usage = atomic_read(&block->usage);
-	/*kenter(",{bix=%u u=%d",block->bix,atomic_read(&block->usage));*/
-
-	if ((usage&0xffffff00)==0x6b6b6b00) {
-		printk("\ncachefs_block_put(%p,%p{u=%d})\n",super,block,usage);
-		BUG();
+	if (block) {
+		int usage = atomic_read(&block->usage);
+		/*kenter(",{bix=%u u=%d",block->bix,atomic_read(&block->usage));*/
+
+		if ((usage&0xffffff00)==0x6b6b6b00) {
+			printk("\ncachefs_block_put(%p{u=%d})\n",block,usage);
+			BUG();
+		}
+
+		BUG_ON(usage<=0);
+		if (atomic_dec_and_test(&block->usage))
+			__cachefs_block_put(block);
 	}
-
-	BUG_ON(usage<=0);
-	if (atomic_dec_and_test(&block->usage))
-		__cachefs_block_put(super,block);
 }
 
 static inline struct cachefs_block *__cachefs_get_page_block(struct page *page)
@@ -375,18 +405,36 @@
 	cachefs_block_modify(super,__cachefs_get_page_block(*page),page);
 }
 
+extern void cachefs_block_withdraw(struct cachefs_super *super);
+
+/*****************************************************************************/
+/*
+ * record of as-yet invalid data block for which a v-journal entry exists
+ */
+struct cachefs_vj_entry
+{
+	struct list_head	link;
+	cachefs_blockix_t	bix;
+	unsigned		ino;		/* inode to which applies */
+	unsigned		pgnum;		/* page in inode */
+	unsigned		vslot;		/* v-journal slot in which mark stored */
+	struct page		*vpage;		/* page holding vblock */
+	struct cachefs_block	*vblock;	/* v-journal block in which mark stored */
+	unsigned		ventry;		/* offset in vblock at which mark stored */
+	unsigned		upblock;	/* block in which pointer stored */
+	unsigned		upentry;	/* offset in upblock at which pointer stored */
+};
+
+extern int cachefs_vj_alloc(struct cachefs_transaction *trans, struct cachefs_inode *inode);
+extern void cachefs_vj_release(struct cachefs_super *super, struct cachefs_vj_entry *vjentry);
+extern void cachefs_vj_cancel(struct cachefs_vj_entry *vjentry);
+extern void cachefs_vj_write_complete(struct cachefs_vj_entry *vjentry);
+
+
 /*****************************************************************************/
 /*
  * transaction record and tracking structure
- * - an operation consists of a number of stages:
- *   (1) write a mark to the journal
- *   (2) modify memory-resident data
- *   (3) write modified data to disc
- *   (4) write an ACK to the journal
- * - each operation can touch any number of disc blocks in stages (2)/(3)
- * - stages (2) and (3) can proceed when EITHER stage (1) is complete OR there are no outstanding
- *   writes remaining on the block being modified
- *   - ALSO stage (1) must be barriered at the BIO level
+ * - these record the modification of metadata (and not, generally, ordinary data)
  */
 enum cachefs_trans_phase {
 	CACHEFS_TRANS_PREPARING,	/* mark is being prepared */
@@ -399,8 +447,6 @@
 {
 	struct cachefs_block		*block;
 	struct page			*held_page;	/* page on hold till writeback complete */
-	unsigned short			start;		/* start sector in block */
-	unsigned short			stop;		/* stop sector in block */
 };
 
 #define CACHEFS_EFFECTS_PER_TRANS 4
@@ -418,6 +464,7 @@
 
 	struct cachefs_block		*jblock;	/* block holding ondisc u-journal entry */
 	struct page			*jpage;		/* page holding u-journal entry */
+	struct cachefs_vj_entry		*vjentry;	/* associated v-journal entry */
 	struct cachefs_super		*super;
 	struct list_head		sblink;		/* next transaction in superblock's list */
 
@@ -431,7 +478,12 @@
 extern struct cachefs_transaction *cachefs_trans_alloc(struct cachefs_super *super,
 						       unsigned long gfp);
 
-extern void cachefs_trans_put(struct cachefs_transaction *trans);
+extern void __cachefs_trans_put(struct cachefs_transaction *trans);
+static inline void cachefs_trans_put(struct cachefs_transaction *trans)
+{
+	if (trans)
+		__cachefs_trans_put(trans);
+}
 
 extern void cachefs_trans_affects_block(struct cachefs_transaction *trans,
 					struct cachefs_block *target,
@@ -439,12 +491,11 @@
 					unsigned size);
 
 static inline void cachefs_trans_affects_page(struct cachefs_transaction *trans,
-					      struct page *page,
+					      struct cachefs_page *pageio,
 					      unsigned offset,
 					      unsigned size)
 {
-	cachefs_trans_affects_block(trans,__cachefs_get_page_private(page)->mapped_block,
-				    offset,size);
+	cachefs_trans_affects_block(trans,pageio->mapped_block,offset,size);
 }
 
 static inline void cachefs_trans_affects_inode(struct cachefs_transaction *trans,
@@ -453,7 +504,7 @@
 	struct cachefs_super *super = inode->vfs_inode.i_sb->s_fs_info;
 
 	cachefs_trans_affects_page(trans,
-				   inode->storage,
+				   __cachefs_page_get_private(inode->storage),
 				   inode->storage_offset,
 				   super->layout->storage_size);
 }
@@ -461,7 +512,8 @@
 static inline void cachefs_trans_affects_super(struct cachefs_transaction *trans)
 {
 	struct cachefs_super *super = trans->super;
-	cachefs_trans_affects_page(trans,virt_to_page(super->layout),0,super->sb->s_blocksize);
+	cachefs_trans_affects_page(trans,__cachefs_page_get_private(virt_to_page(super->layout)),
+				   0,super->sb->s_blocksize);
 }
 
 extern int  cachefs_trans_mark(struct cachefs_transaction *trans);
@@ -496,12 +548,13 @@
 		  );							\
 } while(0)
 
-#define dbgpgfree(PAGE)							\
-do {									\
-	_dbprintk("PGFREE %s:%d: %p {%lx,%lu}\n",			\
-		  __FILE__,__LINE__,					\
-		  (PAGE),(PAGE)->mapping->host->i_ino,(PAGE)->index	\
-		  );							\
+#define dbgpgfree(PAGE)								\
+do {										\
+	if ((PAGE))								\
+		_dbprintk("PGFREE %s:%d: %p {%lx,%lu}\n",			\
+			  __FILE__,__LINE__,					\
+			  (PAGE),(PAGE)->mapping->host->i_ino,(PAGE)->index	\
+			  );							\
 } while(0)
 
 #ifdef __KDEBUG

Index: block.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/block.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- block.c	29 Apr 2003 15:24:06 -0000	1.4
+++ block.c	23 May 2003 12:59:22 -0000	1.5
@@ -17,13 +17,30 @@
 #include <linux/bio.h>
 #include "cachefs-int.h"
 
+kmem_cache_t *cachefs_block_jar;
+
+void cachefs_block_init_once(void *_block, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct cachefs_block *block = _block;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		memset(block,0,sizeof(*block));
+
+		rwlock_init(&block->ref_lock);
+		init_waitqueue_head(&block->writewq);
+		INIT_LIST_HEAD(&block->batch_link);
+	}
+}
+
 static int cachefs_block_dummy_filler(void *data, struct page *page)
 {
 	struct cachefs_page *pageio;
 	int ret;
 
+	_enter("%p,{%lu}",data,page->index);
+
 	/* we need somewhere to note journal ACKs that need to be made */
-	ret = cachefs_get_page_private(page,&pageio,GFP_KERNEL);
+	ret = cachefs_page_get_private(page,&pageio,GFP_KERNEL);
 	if (ret<0)
 		return ret;
 
@@ -35,7 +52,6 @@
 	flush_dcache_page(page);
 
 	SetPageUptodate(page);
-	SetPageMappedToDisk(page);
 
 	unlock_page(page);
 	return 0;
@@ -47,7 +63,8 @@
  */
 int cachefs_block_set(struct cachefs_super *super,
 		      struct cachefs_block *block,
-		      struct page *page)
+		      struct page *page,
+		      struct cachefs_page *pageio)
 {
 	DECLARE_WAITQUEUE(myself,current);
 
@@ -56,7 +73,7 @@
 	_enter(",%u,",block->bix);
 
 	/* don't do anything if already associated as we want */
-	block2 = __cachefs_get_page_block(page);
+	block2 = pageio->mapped_block;
 	if (block2) {
 		if (block2==block) {
 			if (block->page==page) {
@@ -64,8 +81,7 @@
 				return 0;
 			}
 
-			xchg(&block->page,page);
-			SetPageMappedToDisk(page);
+			block->page = page;
 			_leave(" = 0 [assoc xchg]");
 			return 0;
 		}
@@ -94,11 +110,10 @@
 	}
 
 	/* make the association */
-	__cachefs_get_page_private(page)->mapped_block = cachefs_block_get(block);
+	pageio->mapped_block = cachefs_block_get(block);
 
 	clear_bit(CACHEFS_BLOCK_COW,&block->flags);
-	xchg(&block->page,page);
-	SetPageMappedToDisk(page);
+	block->page = page;
 
 	clear_bit(CACHEFS_BLOCK_ALLOC,&block->flags);
 	wake_up_all(&block->writewq);
@@ -118,6 +133,7 @@
 int cachefs_block_set2(struct cachefs_super *super,
 		       cachefs_blockix_t bix,
 		       struct page *page,
+		       struct cachefs_page *pageio,
 		       struct cachefs_block **_block)
 {
 	struct cachefs_block *block;
@@ -132,7 +148,7 @@
 	if (ret<0)
 		goto error;
 
-	ret = cachefs_block_set(super,block,page);
+	ret = cachefs_block_set(super,block,page,pageio);
 	if (ret<0)
 		goto error2;
 
@@ -142,7 +158,7 @@
 	}
 
  error2:
-	cachefs_block_put(super,block);
+	cachefs_block_put(block);
  error:
 	_leave(" = %d",ret);
 	return ret;
@@ -169,8 +185,8 @@
 
 	_enter(",%lx,%u,%d,,",inode?inode->vfs_inode.i_ino:CACHEFS_INO_MISC,bix,wipe);
 
-	if (_block) *_block = NULL;
-	if (_page)  *_page = NULL;
+	if (_block)	*_block  = NULL;
+	if (_page)	*_page   = NULL;
 
 	/* get the block definition */
 	ret = cachefs_block_insert(super,bix,&block);
@@ -232,7 +248,7 @@
 		page = read_cache_page(mapping,bix,filler,block);
 
 		if (IS_ERR(page)) {
-			cachefs_block_put(super,block);
+			cachefs_block_put(block);
 			_leave(" = %ld",PTR_ERR(page));
 			return PTR_ERR(page);
 		}
@@ -251,7 +267,7 @@
 		*_block = block;
 	}
 	else {
-		cachefs_block_put(super,block);
+		cachefs_block_put(block);
 		block = NULL;
 	}
 
@@ -270,7 +286,7 @@
 	clear_bit(CACHEFS_BLOCK_ALLOC,&block->flags);
 	wake_up_all(&block->writewq);
  intr:
-	cachefs_block_put(super,block);
+	cachefs_block_put(block);
 	_leave(" = -EINTR");
 	return -EINTR;
 } /* end cachefs_block_read() */
@@ -314,7 +330,7 @@
 
 		newpage = page_cache_alloc_cold(mapping);
 		if (!newpage)
-			goto nomem;			
+			goto nomem;
 
 		if (cachefs_get_page_private(newpage,&newpageio,mapping->gfp_mask)<0)
 			goto nomem_page;
@@ -407,16 +423,17 @@
 	*_block = NULL;
 
 	/* allocate a block record just in case */
-	newblock = kmalloc(sizeof(*newblock),GFP_KERNEL);
+	newblock = kmem_cache_alloc(cachefs_block_jar,SLAB_KERNEL);
 	if (!newblock)
 		return -ENOMEM;
 
-	memset(newblock,0,sizeof(*newblock));
 	atomic_set(&newblock->usage,1);
-	init_waitqueue_head(&newblock->writewq);
-	INIT_LIST_HEAD(&newblock->batch_link);
-	newblock->bix	= bix;
-	newblock->super	= super;
+	newblock->flags		= 0;
+	newblock->bix		= bix;
+	newblock->super		= super;
+	newblock->page		= NULL;
+	newblock->writeback	= NULL;
+	newblock->ref		= NULL;
 
 	parent = NULL;
 	block = NULL;
@@ -451,7 +468,7 @@
 	write_unlock_irqrestore(&super->blk_tree_lock,flags);
 
 	dbgfree(newblock);
-	kfree(newblock);
+	kmem_cache_free(cachefs_block_jar,newblock);
 
 	*_block = block;
 	_leave(" = 0 [found %p{u=%d}]",block,atomic_read(&block->usage));
@@ -505,8 +522,9 @@
 /*
  * dispose of a block in the superblock's lookup tree
  */
-void __cachefs_block_put(struct cachefs_super *super, struct cachefs_block *block)
+void __cachefs_block_put(struct cachefs_block *block)
 {
+	struct cachefs_super *super = block->super;
 	unsigned long flags;
 
 	_enter(",{u=%d bix=%d}",atomic_read(&block->usage),block->bix);
@@ -523,8 +541,95 @@
 	if (block) {
 		atomic_dec(&super->cnt_blk_tree);
 		dbgfree(block);
-		kfree(block);
+		kmem_cache_free(cachefs_block_jar,block);
 	}
 
 	_leave("");
 } /* end __cachefs_block_put() */
+
+/*****************************************************************************/
+/*
+ * withdraw from active service all the blocks residing on a device
+ */
+void cachefs_block_withdraw(struct cachefs_super *super)
+{
+	struct cachefs_block *block, *xblock;
+	struct cachefs_page *pageio;
+	struct rb_node *node;
+	unsigned long flags;
+
+	DECLARE_WAITQUEUE(myself,current);
+
+	kenter("");
+
+	/* first thing to do is mark all blocks withdrawn
+	 * - this prevents the netfs from getting underfoot
+	 */
+	read_lock_irqsave(&super->blk_tree_lock,flags);
+
+	for (node=rb_first(&super->blk_tree); node; node=rb_next(node)) {
+		block = rb_entry(node,struct cachefs_block,lookup_node);
+		set_bit(CACHEFS_BLOCK_WITHDRAWN,&block->flags);
+	}
+
+	read_unlock_irqrestore(&super->blk_tree_lock,flags);
+
+	/* now withdraw each block that's already in use by a netfs */
+	for (;;) {
+		block = NULL;
+		write_lock_irqsave(&super->blk_tree_lock,flags);
+
+		for (node=rb_first(&super->blk_tree); node; node=rb_next(node)) {
+			block = rb_entry(node,struct cachefs_block,lookup_node);
+			if (block->ref) {
+				cachefs_block_get(block);
+				break;
+			}
+		}
+
+		write_unlock_irqrestore(&super->blk_tree_lock,flags);
+
+		if (!node)
+			break;
+
+		kdebug("withdraw block %u",block->bix);
+
+		/* disconnect block from netfs page cookie */
+		xblock = NULL;
+		write_lock(&block->ref_lock);
+		pageio = block->ref;
+		if (pageio) {
+			write_lock(&pageio->lock);
+			xblock = pageio->mapped_block;
+			pageio->mapped_block = NULL;
+			block->ref = NULL;
+			write_unlock(&block->ref_lock);
+		}
+		write_unlock(&pageio->lock);
+
+		if (xblock)
+			cachefs_block_put(xblock);
+
+		/* wait for the netfs to finish with the block */
+		if (test_bit(CACHEFS_BLOCK_NETFSBUSY,&block->flags)) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			add_wait_queue(&block->writewq,&myself);
+
+			while (test_bit(CACHEFS_BLOCK_NETFSBUSY,&block->flags)) {
+				schedule();
+				set_current_state(TASK_UNINTERRUPTIBLE);
+			}
+
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&block->writewq,&myself);
+		}
+
+		/* a block that's not yet valid must be cancelled */
+		if (block->vjentry)
+			cachefs_vj_cancel(block->vjentry);
+
+		cachefs_block_put(block);
+	}
+
+	kleave("");
+} /* end cachefs_block_withdraw() */

Index: aops.c
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/aops.c,v
retrieving revision 1.28
retrieving revision 1.29
diff -u -r1.28 -r1.29
--- aops.c	29 Apr 2003 15:24:06 -0000	1.28
+++ aops.c	23 May 2003 12:59:22 -0000	1.29
@@ -29,6 +29,7 @@
 
 struct cachefs_io_block_path {
 	struct page			*page;		/* page holding this block */
+	struct cachefs_page		*pageio;	/* page => block mapping */
 	cachefs_blockix_t		bix;		/* block number for this level */
 	unsigned			offset;		/* offset into parent pointer block */
 
@@ -38,7 +39,8 @@
 #define CACHEFS_BLOCK_WRITTEN		0x00000004
 #define CACHEFS_BLOCK_INIT_INDIRECT	0x00000008
 #define CACHEFS_BLOCK_INIT_INDEX	0x00000010
-#define CACHEFS_BLOCK_NEED_SYNC		0x00000020
+#define CACHEFS_BLOCK_INIT_DATA		0x00000020
+#define CACHEFS_BLOCK_NEED_SYNC		0x00000040
 
 	/* ujournal marks for allocation journalling entries */
 	enum cachefs_ondisc_ujnl_mark	mktype : 8;
@@ -69,7 +71,7 @@
 	.invalidatepage		= cachefs_invalidatepage,
 };
 
-int cachefs_get_page_private(struct page *page, struct cachefs_page **_pageio, unsigned gfp_flags)
+int cachefs_page_get_private(struct page *page, struct cachefs_page **_pageio, unsigned gfp_flags)
 {
 	struct cachefs_page *pageio = (struct cachefs_page*) page->private;
 
@@ -285,19 +287,19 @@
 
 	_enter("");
 
-	ret = cachefs_get_page_private(page,&pageio,GFP_KERNEL);
+	ret = cachefs_page_get_private(page,&pageio,GFP_KERNEL);
 	if (ret<0)
 		goto error;
 
 	last_block = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (page->index < last_block) {
 		if (inode->i_ino==CACHEFS_INO_STORAGE && page->index==0) {
-			ret = cachefs_block_set2(inode->i_sb->s_fs_info,1,page,NULL);
+			ret = cachefs_block_set2(inode->i_sb->s_fs_info,1,page,pageio,NULL);
 			if (ret<0)
 				goto error;
 		}
 		else {
-			ret = cachefs_get_block(inode,page,0);
+			ret = cachefs_get_block(inode,page,pageio,0);
 			if (ret<0)
 				goto error;
 		}
@@ -432,7 +434,7 @@
 
 	_enter(",{%lu},%llu,",page->index,*last_block_in_bio);
 
-	ret = cachefs_get_page_private(page,&pageio,GFP_KERNEL);
+	ret = cachefs_page_get_private(page,&pageio,GFP_KERNEL);
 	if (ret<0)
 		goto error;
 
@@ -441,7 +443,7 @@
 		/* map the page to a block on the blockdev */
 		BUG_ON(!PageUptodate(page));
 
-		ret = cachefs_get_block(inode,page,1);
+		ret = cachefs_get_block(inode,page,pageio,1);
 		if (ret<0) {
 			*_ret = ret;
 			goto out;
@@ -558,7 +560,7 @@
 	}
 
 	pagevec_init(&pvec, 0);
-	write_lock(&mapping->page_lock);
+	spin_lock(&mapping->page_lock);
 
 	list_splice_init(&mapping->dirty_pages,&mapping->io_pages);
 
@@ -581,7 +583,7 @@
 		list_add(&page->list,&mapping->locked_pages);
 
 		page_cache_get(page);
-		write_unlock(&mapping->page_lock);
+		spin_unlock(&mapping->page_lock);
 
 		/*
 		 * At this point we hold neither mapping->page_lock nor
@@ -611,11 +613,11 @@
 		}
 
 		page_cache_release(page);
-		write_lock(&mapping->page_lock);
+		spin_lock(&mapping->page_lock);
 	}
 
 	/* leave any remaining dirty pages on ->io_pages */
-	write_unlock(&mapping->page_lock);
+	spin_unlock(&mapping->page_lock);
 	if (bio)
 		cachefs_io_bio_submit(WRITE,&bio);
 
@@ -641,7 +643,7 @@
 	BUG_ON(!PageUptodate(page));
 	BUG_ON(PageWriteback(page));
 
-	ret = cachefs_get_page_private(page,&pageio,GFP_NOFS);
+	ret = cachefs_page_get_private(page,&pageio,GFP_NOFS);
 	if (ret<0)
 		goto error;
 
@@ -664,7 +666,7 @@
 	}
 
 	if (!pageio->mapped_block) {
-		ret = cachefs_get_block(inode,page,1);
+		ret = cachefs_get_block(inode,page,pageio,1);
 		if (ret<0)
 			goto error;
 
@@ -708,13 +710,13 @@
 
 	_enter(",{%lu},%u,%u",page->index,from,to);
 
-	ret = cachefs_get_page_private(page,&pageio,GFP_NOFS);
+	ret = cachefs_page_get_private(page,&pageio,GFP_NOFS);
 	if (ret<0)
 		goto error;
 
 	/* map the page to a disc block and prepare a new page for writing */
 	if (!pageio->mapped_block) {
-		ret = cachefs_get_block(inode,page,1);
+		ret = cachefs_get_block(inode,page,pageio,1);
 		if (ret<0)
 			goto error;
 
@@ -817,10 +819,9 @@
 
 	pageio = (struct cachefs_page*) page->private;
 	block = xchg(&pageio->mapped_block,NULL);
-	ClearPageMappedToDisk(page);
 	pageio->flags = 0;
 
-	if (block) cachefs_block_put(page->mapping->host->i_sb->s_fs_info,block);
+	if (block) cachefs_block_put(block);
 
 	/*
 	 * We release buffers only if the entire page is being invalidated.
@@ -851,12 +852,11 @@
 		pageio = (struct cachefs_page*) page->private;
 		page->private = 0;
 		ClearPagePrivate(page);
-		ClearPageMappedToDisk(page);
 
 		if (pageio) {
 			block = xchg(&pageio->mapped_block,NULL);
 			if (block)
-				cachefs_block_put(page->mapping->host->i_sb->s_fs_info,block);
+				cachefs_block_put(block);
 			dbgfree(pageio);
 			kfree(pageio);
 		}
@@ -887,7 +887,7 @@
 	int ret;
 	u8 *data;
 
-	_enter(",,{pg=%p}",step->page);
+	kenter(",,{pg=%p}",step->page);
 
 	jentry = step->transaction->jentry;
 
@@ -896,8 +896,11 @@
 	/* do all the allocation first */
 	ret = -ENOMEM;
 
-	cachefs_trans_affects_page(step->transaction,
-				   step[1].page,step->offset,sizeof(cachefs_blockix_t));
+	BUG_ON(!step[1].pageio);
+	BUG_ON(!step[1].pageio->mapped_block);
+
+	cachefs_trans_affects_page(step->transaction,step[1].pageio,
+				   step->offset,sizeof(cachefs_blockix_t));
 
 	/* index content data blocks need to be initialised on disc */
 	if (step->flags & CACHEFS_BLOCK_INIT_INDEX) {
@@ -909,6 +912,21 @@
 		jentry->count = inode->index_epp;
 	}
 
+	/* freshly allocated data blocks must be recorded in the v-journal */
+	if (step->flags & CACHEFS_BLOCK_INIT_DATA) {
+		kdebug("init data");
+
+		ret = cachefs_vj_alloc(step->transaction,inode);
+		if (ret<0)
+			goto error_trans;
+
+		step->transaction->vjentry->pgnum	= step->page->index;
+		step->transaction->vjentry->upblock	= step[1].bix;
+		step->transaction->vjentry->upentry	= step->offset;
+
+		jentry->auxmark = step->transaction->vjentry->vslot;
+	}
+
 	/* wait for a node to become available in the allocation stack */
 	down(&super->alloc_sem);
 
@@ -930,7 +948,7 @@
 			goto error_sem;
 	}
 
-	_debug("use leaf %u/%lu",super->alloc_leaf,CACHEFS_ONDISC_LEAVES_PER_FREE_NODE);
+	kdebug("use leaf %u/%lu",super->alloc_leaf,CACHEFS_ONDISC_LEAVES_PER_FREE_NODE);
 
 	BUG_ON(super->alloc_leaf>CACHEFS_ONDISC_LEAVES_PER_FREE_NODE);
 
@@ -951,7 +969,7 @@
 		jentry->auxentry = 1;
 
 		if (step->page)
-			cachefs_block_set(super,block,step->page);
+			cachefs_block_set(super,block,step->page,step->pageio);
 	}
 	else {
 		/* take the next dependent page */
@@ -968,20 +986,22 @@
 			ret = cachefs_block_read(super,NULL,jentry->block,1,&block,&step->page);
 			if (ret<0)
 				goto error_block;
+			step->pageio = __cachefs_page_get_private(step->page);
 		}
 		else {
-			ret = cachefs_block_set2(super,jentry->block,step->page,&block);
+			ret = cachefs_block_set2(super,jentry->block,step->page,step->pageio,
+						 &block);
 			if (ret<0)
 				goto error_block;
 		}
 	}
 
-	if (step->flags & (CACHEFS_BLOCK_INIT_INDIRECT | CACHEFS_BLOCK_INIT_INDIRECT))
+	if (step->flags & (CACHEFS_BLOCK_INIT_INDEX | CACHEFS_BLOCK_INIT_INDIRECT))
 		cachefs_trans_affects_block(step->transaction,block,0,PAGE_SIZE);
 
 	jentry->size = inode->vfs_inode.i_size;
 
-	_debug("selected block %u (next {%u}+%hu)",
+	kdebug("selected block %u (next {%u}+%hu)",
 	       jentry->block,jentry->auxblock,jentry->auxentry);
 
 	BUG_ON(jentry->block > super->layout->bix_end);
@@ -991,7 +1011,7 @@
 	    super->alloc_leaf>=CACHEFS_ONDISC_LEAVES_PER_FREE_NODE-30 &&
 	    !super->alloc_next
 	    ) {
-		_debug("prepare 2OS %u",alloc2os);
+		kdebug("prepare 2OS %u",alloc2os);
 
 		ret = cachefs_block_read(super,NULL,alloc2os,0,
 					 &super->alloc_nxblock,
@@ -1006,6 +1026,11 @@
 	if (cachefs_trans_mark(step->transaction)<0)
 		goto error_block;
 
+	if (step->flags & CACHEFS_BLOCK_INIT_DATA) {
+		block->vjentry = step->transaction->vjentry;
+		block->vjentry->bix = block->bix;
+	}
+
 	/* indirection blocks need to be cleared before use */
 	if (step->flags & CACHEFS_BLOCK_INIT_INDIRECT) {
 		cachefs_block_modify(super,block,&step->page);
@@ -1066,21 +1091,22 @@
 		cachefs_trans_sync(super,0);
 	}
 
-	cachefs_block_put(super,block);
+	cachefs_block_put(block);
 	block = NULL;
 
-	_leave(" = 0 [block %u]",step->bix);
+	kleave(" = 0 [block %u]",step->bix);
 	return 0;
 
  error_block:
-	cachefs_block_put(super,block);
+	cachefs_block_put(block);
 	block = NULL;
  error_sem:
 	up(&super->alloc_sem);
+ error_trans:
 	cachefs_trans_put(step->transaction);
 	step->transaction = NULL;
 	unlock_page(step[1].page);
-	_leave(" = %d",ret);
+	kleave(" = %d",ret);
 	return ret;
 } /* end cachefs_get_block_alloc() */
 
@@ -1099,55 +1125,46 @@
  * - if the inode forms part of an index, then the any blocks belong to that index and must be
  *   initialised as part of the final journalling mark
  */
-int cachefs_get_block(struct inode *vfs_inode, struct page *page, int create)
+int cachefs_get_block(struct inode *vfs_inode, struct page *page, struct cachefs_page *pageio,
+		      int create)
 {
 	struct cachefs_io_block_path path[4];
 	struct cachefs_inode *inode = CACHEFS_FS_I(vfs_inode);
 	struct cachefs_super *super = inode->vfs_inode.i_sb->s_fs_info;
-	struct cachefs_page *pageio = __cachefs_get_page_private(page);
 	const size_t ptrperblk = PAGE_SIZE / sizeof(cachefs_blockix_t);
 	sector_t iblock;
 	size_t ptrqty, notboundary = 1;
 	int pix, ret;
 
-#if 0
-	if (inode->vfs_inode.i_ino == CACHEFS_INO_WIBBLE) {
-		ret = cachefs_block_insert(inode->vfs_inode.i_sb->s_fs_info,page->index+1000,
-					   &pageio->mapped_block);
-		if (ret==0)
-			SetPageMappedToDisk(page);
-		return ret;
-	}
-
-#endif
-
-	_enter("%lu,{%p}%lu,%d",inode->vfs_inode.i_ino,page,page->index,create);
+	kenter("%lu,{%p}%lu,,%d",inode->vfs_inode.i_ino,page,page->index,create);
 
 	BUG_ON(pageio->mapped_block);
 
 	if (page->index/ptrperblk >= ptrperblk) {
-		_leave(" = -EIO [range]");
+		kleave(" = -EIO [range]");
 		return -EIO;
 	}
 
 	memset(path,0,sizeof(path));
-	path[2].mktype = CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING;
-	path[1].mktype = CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING;
-	path[0].mktype = CACHEFS_ONDISC_UJNL_DATA_ALLOCING;
+	path[2].mktype	= CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING;
+	path[1].mktype	= CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING;
+	path[0].mktype	= CACHEFS_ONDISC_UJNL_DATA_ALLOCING;
+	path[0].flags	= CACHEFS_BLOCK_INIT_DATA;
 
 	if (inode->index_esize) {
 		path[0].mktype	= CACHEFS_ONDISC_UJNL_INDEX_EXTENDING;
 		path[0].flags	= CACHEFS_BLOCK_INIT_INDEX;
 	}
 
-	path[0].page = page;
+	path[0].page	= page;
+	path[0].pageio	= pageio;
 
 	/* is it inside direct range? */
 	iblock = page->index;
 	ptrqty = super->sb->s_blocksize - sizeof(struct cachefs_ondisc_storage);
 	ptrqty /= sizeof(cachefs_blockix_t);
 	if (iblock < ptrqty) {
-		_debug("direct (%llu/%u)",iblock,ptrqty);
+		kdebug("direct (%llu/%u)",iblock,ptrqty);
 		notboundary = ptrqty - iblock + 1;
 
 		path[0].offset	= iblock * sizeof(cachefs_blockix_t);
@@ -1162,7 +1179,7 @@
 	/* is it inside single-indirect range? */
 	ptrqty = ptrperblk;
 	if (iblock < ptrqty) {
-		_debug("indirect (%llu/%u)",iblock,ptrqty);
+		kdebug("indirect (%llu/%u)",iblock,ptrqty);
 		notboundary = (iblock+1) & (ptrperblk-1);
 
 		path[0].offset	= iblock * sizeof(cachefs_blockix_t);
@@ -1179,7 +1196,7 @@
 	/* is it inside double-indirect range? */
 	ptrqty *= ptrqty;
 	if (iblock < ptrqty) {
-		_debug("double indirect (%llu/%u)",iblock,ptrqty);
+		kdebug("double indirect (%llu/%u)",iblock,ptrqty);
 		notboundary = (iblock+1) & (ptrperblk-1);
 
 		path[0].offset	= sector_div(iblock,PAGE_SIZE/sizeof(cachefs_blockix_t));
@@ -1206,13 +1223,15 @@
 	page_cache_get(path[pix+1].page);
 
 	path[pix].offset += inode->storage_offset;
-	path[pix+1].bix = __cachefs_get_page_block(inode->storage)->bix;
+
+	path[pix+1].pageio	= __cachefs_page_get_private(inode->storage);
+	path[pix+1].bix		= path[pix+1].pageio->mapped_block->bix;
 
 	ret = 0;
 	for (; pix>=0; pix--) {
 		struct cachefs_io_block_path *step = &path[pix];
 
-		_debug("step level %u { ptr={%lu}+%u / bix=%u }",
+		kdebug("step level %u { ptr={%lu}+%u / bix=%u }",
 		       pix,step[1].page->index,step->offset,step[1].bix);
 
 		/* get the block number for this level */
@@ -1227,12 +1246,12 @@
 			struct cachefs_ondisc_update_journal *jentry;
 
 			if (!create) {
-				_debug("path incomplete at level %d",pix);
+				kdebug("path incomplete at level %d",pix);
 				ret = -ENODATA;
 				break;
 			}
 
-			_debug("need to allocate level %d block",pix);
+			kdebug("need to allocate level %d block",pix);
 
 			step->transaction = cachefs_trans_alloc(inode->vfs_inode.i_sb->s_fs_info,
 								GFP_NOFS);
@@ -1256,7 +1275,7 @@
 			step->flags |= CACHEFS_BLOCK_NEW;
 		}
 		else if (step->page) {
-			ret = cachefs_block_set2(super,step->bix,step->page,NULL);
+			ret = cachefs_block_set2(super,step->bix,step->page,step->pageio,NULL);
 			if (ret<0)
 				break;
 		}
@@ -1268,10 +1287,10 @@
 		/* initiate or read the this block as appropriate */
 		if (!step->page) {
 			if (step->flags & CACHEFS_BLOCK_NEW) {
-				_debug("getting level %d block %u",pix,step->bix);
+				kdebug("getting level %d block %u",pix,step->bix);
 			}
 			else {
-				_debug("reading level %d block %u",pix,step->bix);
+				kdebug("reading level %d block %u",pix,step->bix);
 			}
 
 			ret = cachefs_block_read(super,NULL,step->bix,
@@ -1297,8 +1316,8 @@
 	if (ret<0) {
 		struct cachefs_block *block = xchg(&pageio->mapped_block,NULL);
 		if (block)
-			cachefs_block_put(inode->vfs_inode.i_sb->s_fs_info,block);
-		_leave(" = %d",ret);
+			cachefs_block_put(block);
+		kleave(" = %d",ret);
 		return ret;
 	}
 
@@ -1306,11 +1325,11 @@
 	if (path[0].flags & CACHEFS_BLOCK_NEW)
 		set_bit(CACHEFS_PAGE_NEW,&pageio->flags);
 
-	_debug("notboundary = %u",notboundary);
+	kdebug("notboundary = %u",notboundary);
 	if (!notboundary)
 		set_bit(CACHEFS_PAGE_BOUNDARY,&pageio->flags);
 
-	_leave(" = 0 [bix=%u %c%c]",
+	kleave(" = 0 [bix=%u %c%c]",
 	       pageio->mapped_block->bix,
 	       test_bit(CACHEFS_PAGE_BOUNDARY,&pageio->flags)	? 'b' : '-',
 	       test_bit(CACHEFS_PAGE_NEW,&pageio->flags)	? 'n' : '-'
@@ -1327,14 +1346,14 @@
 	struct address_space *mapping = inode->vfs_inode.i_mapping;
 	struct page *page;
 
-	_enter("{%lu},%u",inode->vfs_inode.i_ino,index);
+	kenter("{%lu},%u",inode->vfs_inode.i_ino,index);
 
 	*_page = NULL;
 
 	page = read_cache_page(mapping,index,(filler_t*)mapping->a_ops->readpage,NULL);
 	dbgpgalloc(page);
 	if (IS_ERR(page)) {
-		_leave(" = %ld [read failed]",PTR_ERR(page));
+		kleave(" = %ld [read failed]",PTR_ERR(page));
 		return PTR_ERR(page);
 	}
 
@@ -1346,13 +1365,13 @@
 		goto failed;
 
 	*_page = page;
-	_leave(" = 0 [page %p]",page);
+	kleave(" = 0 [page %p]",page);
 	return 0;
 
 failed:
 	dbgpgfree(page);
 	cachefs_put_page(page);
-	_leave(" = -EIO");
+	kleave(" = -EIO");
 	return -EIO;
 } /* end cachefs_get_page() */
 

Index: Makefile
===================================================================
RCS file: /home/cvs/afs/fs/cachefs/Makefile,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- Makefile	17 Apr 2003 11:59:04 -0000	1.15
+++ Makefile	23 May 2003 12:59:22 -0000	1.16
@@ -18,7 +18,8 @@
 	recycling.o \
 	rootdir.o \
 	status.o \
-	super.o
+	super.o \
+	vjournal.o
 
 obj-m  := cachefs.o