[RFC] LogFS

Jörn Engel joern at wohnheim.fh-wedel.de
Thu Aug 24 09:44:30 EDT 2006


For the last 16 month, I've been hacking on a small filesystem.  It
has progress far enough that it shouldn't be a total embarrassment to
show the code, but still needs quite a bit of work.

Anyhow, in case people are interested to have a look... comments are
very welcome.

Jörn

-- 
...one more straw can't possibly matter...
-- Kirby Bakken

 fs/Kconfig           |   13 
 fs/Makefile          |    1 
 fs/logfs/CREDITS     |   18 
 fs/logfs/Makefile    |    9 
 fs/logfs/NAMES       |   32 +
 fs/logfs/dir.c       |  464 +++++++++++++++++++++++
 fs/logfs/file.c      |   82 ++++
 fs/logfs/gc.c        |  517 ++++++++++++++++++++++++++
 fs/logfs/inode.c     |  385 +++++++++++++++++++
 fs/logfs/journal.c   |  216 +++++++++++
 fs/logfs/logfs.h     |  431 ++++++++++++++++++++++
 fs/logfs/readwrite.c |  992
+++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/logfs/super.c     |  435 ++++++++++++++++++++++
 13 files changed, 3595 insertions(+)

--- logfs3/fs/Kconfig~logfs	2006-08-24 15:35:33.000000000 +0200
+++ logfs3/fs/Kconfig	2006-08-24 15:39:42.000000000 +0200
@@ -1230,6 +1230,19 @@ config JFFS2_CMODE_SIZE
 
 endchoice
 
+config LOGFS
+	tristate "Log Filesystem (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  Successor of JFFS2, using explicit filesystem hierarchy.
+	  Continuing with the long tradition of calling the filesystem
+	  exactly what it is not, LogFS is a journaled filesystem,
+	  while JFFS and JFFS2 were true log-structured filesystems.
+	  The hybrid structure of journaled filesystems promise to
+	  scale better to larger sized.
+
+	  If unsure, say N.
+
 config CRAMFS
 	tristate "Compressed ROM file system support (cramfs)"
 	select ZLIB_INFLATE
--- logfs3/fs/Makefile~logfs	2006-08-24 15:35:33.000000000 +0200
+++ logfs3/fs/Makefile	2006-08-24 15:39:42.000000000 +0200
@@ -84,6 +84,7 @@ obj-$(CONFIG_UFS_FS)		+= ufs/
 obj-$(CONFIG_EFS_FS)		+= efs/
 obj-$(CONFIG_JFFS_FS)		+= jffs/
 obj-$(CONFIG_JFFS2_FS)		+= jffs2/
+obj-$(CONFIG_LOGFS)		+= logfs/
 obj-$(CONFIG_AFFS_FS)		+= affs/
 obj-$(CONFIG_ROMFS_FS)		+= romfs/
 obj-$(CONFIG_QNX4FS_FS)		+= qnx4/
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/CREDITS	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,18 @@
+Testing
+2005-2006	Joern Engel
+2006		Arnd Bergmann
+
+Design (in rough chronological order - and some may be unaware of the fact):
+1991		Mendel Rosenblum
+1991		John K. Ousterhout
+2005-2006	Joern Engel
+2005-2006	Arnd Bergmann
+2005		David Woodhouse
+2005-2006	Dirk Bolte
+2005		Carsten Otte
+2006		Martin Schwidefski
+2006		Ferenc Havasi
+2006		Hubertus Franke
+
+Implementation:
+2005-2006	Joern Engel
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/NAMES	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,32 @@
+This filesystem started with the codename "Logfs", which was actually
+a joke at the time.  Logfs was to replace JFFS2, the journaling flash
+filesystem (version 2).  JFFS2 was actually a log structured
+filesystem in its purest form, so the name described just what it was
+not.  Logfs was planned as a journaling filesystem, so its name would
+be in the same tradition of non-description.
+
+Apart from the joke, "Logfs" was only intended as a codename, later to
+be replaced by something better.  Some ideas from various people were:
+logfs
+jffs3
+jefs
+engelfs
+poofs
+crapfs
+sweetfs
+cutefs
+dynamic journaling fs - djofs
+tfsfkal - the file system formerly known as logfs
+
+Later it turned out that while having a journal, Logfs has borrowed so
+many concepts from log structured filesystems that the name actually
+made some sense.
+
+Yet later, Arnd noticed that Logfs was to scale logarithmically with
+increasing flash sizes, where JFFS2 scales linearly.  What a nice
+coincidence.  Even better, its successor can be called Log2fs,
+emphasizing this point.
+
+So to this day, I still like "Logfs" and cannot come up with a better
+name.  And unless someone has the stroke of a genius or there is
+massive opposition against this name, I'd like to just keep it.
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/Makefile	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,9 @@
+obj-$(CONFIG_LOGFS)	+= logfs.o
+
+logfs-y	+= dir.o
+logfs-y	+= file.o
+logfs-y	+= gc.o
+logfs-y	+= inode.o
+logfs-y	+= journal.o
+logfs-y	+= readwrite.o
+logfs-y	+= super.o
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/logfs.h	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,431 @@
+#ifndef logfs_h
+#define logfs_h
+
+#include <linux/kernel.h>
+#include <linux/kallsyms.h>
+
+/**
+ * Throughout the logfs code, we're constantly dealing with blocks at
+ * various positions or offsets.  To remove confusion, we stricly
+ * distinguish between a "position" - the logical position within a
+ * file and an "offset" - the physical location within the device.
+ *
+ * Any usage of the term offset for a logical location or position for
+ * a physical one is a bug and should get fixed.
+ */
+
+/**
+ * Block are allocated in one of several segments depending on their
+ * level.  The following levels are used:
+ * 0-1	- reserved
+ * 2	- gc recycled blocks
+ * 3	- new data blocks
+ * 4	- replacement blocks
+ * 5	- i1 indirect blocks
+ * 6	- i2 indirect blocks
+ * 7	- i3 indirect blocks
+ * 8	- i4 indirect blocks
+ * 9	- i5 indirect blocks
+ * 10	- ifile data blocks
+ * 11	- ifile i1 indirect blocks
+ * 12	- ifile i2 indirect blocks
+ * 13	- ifile i3 indirect blocks
+ * 14	- ifile i4 indirect blocks
+ * 15	- ifile i5 indirect blocks
+ *
+ * Levels 5-15 are necessary for robust gc operations and help seperate
+ * short-lived metadata from longer-lived file data.  Further, file data
+ * is seperated into several segments based on simple heuristics.  Old
+ * data recycled during gc operation is expected to be long-lived.  New
+ * data is of uncertain life expectancy.  New data used to replace older
+ * blocks in existing files is expected to be short-lived.
+ */
+
+
+typedef __be16 be16;
+typedef __be32 be32;
+typedef __be64 be64;
+
+typedef u64 pos_t;
+
+static inline pos_t be64_to_pos(const be64 p)
+{
+	return (__force pos_t) be64_to_cpu(p);
+}
+
+static inline be64 pos_to_be64(const pos_t p)
+{
+	return cpu_to_be64((__force u64) p);
+}
+
+#define packed __attribute__((__packed__))
+
+
+#undef TRACE
+#if 0
+#define TRACE() _TRACE()
+#else
+#define TRACE()
+#endif
+
+#define _TRACE() do {						\
+	printk("trace: %s:%d: ", __FILE__, __LINE__);		\
+	print_symbol("%s", (long)__builtin_return_address(0));	\
+	printk("->%s\n", __func__);				\
+} while(0)
+
+
+#define LOGFS_MAGIC 0xb21f205ac97e8168ull
+#define LOGFS_MAGIC_U32 0xc97e8168ull
+
+
+#define LOGFS_BLOCK_SECTORS	(8)
+#define LOGFS_BLOCK_BITS	(9)	/* 512 pointers, used for shifts */
+#define LOGFS_BLOCKSIZE		(4096ull)
+#define LOGFS_BLOCK_FACTOR (LOGFS_BLOCKSIZE / sizeof(u64))
+#define LOGFS_BLOCK_MASK (LOGFS_BLOCK_FACTOR-1)
+
+#define I0_BLOCKS	(4+16)
+#define I1_BLOCKS LOGFS_BLOCK_FACTOR
+#define I2_BLOCKS (LOGFS_BLOCK_FACTOR * I1_BLOCKS)
+#define I3_BLOCKS (LOGFS_BLOCK_FACTOR * I2_BLOCKS)
+#define I4_BLOCKS (LOGFS_BLOCK_FACTOR * I3_BLOCKS)
+#define I5_BLOCKS (LOGFS_BLOCK_FACTOR * I4_BLOCKS)
+
+#define I1_INDEX	(4+16)
+#define I2_INDEX	(5+16)
+#define I3_INDEX	(6+16)
+#define I4_INDEX	(7+16)
+#define I5_INDEX	(8+16)
+
+#define LOGFS_EMBEDDED_FIELDS	(9+16)
+
+#define LOGFS_EMBEDDED_SIZE	(LOGFS_EMBEDDED_FIELDS * sizeof(u64))
+#define LOGFS_I0_SIZE (I0_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I1_SIZE (I1_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I2_SIZE (I2_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I3_SIZE (I3_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I4_SIZE (I4_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I5_SIZE (I5_BLOCKS * LOGFS_BLOCKSIZE)
+
+#define LOGFS_MAX_INDIRECT	(5)
+#define LOGFS_MAX_LEVELS	(3)
+//#define LOGFS_MAX_LEVELS	(LOGFS_MAX_INDIRECT + 1)
+#define LOGFS_SEGMENTS		(2 * LOGFS_MAX_LEVELS)
+
+
+struct logfs_disk_super {
+	be64	ds_magic;
+	be32	ds_segment_size;
+	be32	ds_block_size;
+
+	be64	ds_journal_ofs;
+	be64	ds_journal_len;
+
+	u8	reserved;
+	u8	ds_ifile_levels;	/* max level of ifile */
+	u8	ds_iblock_levels;	/* max level of regular files */
+	u8	ds_data_levels;		/* number of segments to leaf blocks */
+	be64	ds_root_reserve;
+
+	be64	ds_filesystem_size;
+	be64	ds_feature_incompat;
+
+	be64	ds_feature_ro_compat;
+	be64	ds_feature_compat;
+
+	be32	ds_anchor_size;
+	be32	ds_sum_start;
+}packed;
+
+
+#define LOGFS_IF_VALID		0x00000001 /* inode exists */
+#define LOGFS_IF_EMBEDDED	0x00000002 /* data embedded in block pointers */
+#define LOGFS_IF_INVALID	0x80000000 /* inode does not exist */
+struct logfs_disk_inode {
+	be16	di_mode;
+	be16	di_pad;
+	be32	di_flags;
+	be32	di_uid;
+	be32	di_gid;
+
+	be64	di_ctime;
+	be64	di_mtime;
+
+	be32	di_refcount;
+	be32	di_generation;	/* for nfs file handles */
+	be64	di_blocks;
+
+	be64	di_size;
+	be64	di_data[LOGFS_EMBEDDED_FIELDS];
+}packed;
+
+
+#define LOGFS_MAX_NAMELEN 245
+struct logfs_disk_dentry {
+	be64	ino;		/* inode pointer */
+	be16	namelen;
+	u8	type;
+	u8	name[LOGFS_MAX_NAMELEN];
+}packed;
+
+
+struct logfs_disk_block {
+	be64	ino;
+	be64	pos;
+}packed;
+
+
+struct logfs_disk_sum {
+	/* footer */
+	be32	erase_count;
+	u8	level;
+	u8	pad[3];
+	union {
+		be64	ofs;
+		be64	gec;
+	};
+	struct logfs_disk_block blocks[0];
+}packed;
+
+
+struct logfs_anchor {
+	be64	da_maxec;	/* maximal erase count */
+	be64	da_gec;		/* global erase count */
+
+	be64	da_sweeper;	/* current position of gc "sweeper" */
+	be64	da_last_ino;
+
+	be64	da_size;
+	be64	da_data[LOGFS_EMBEDDED_FIELDS];
+}packed;
+
+
+enum {
+	JE_ANCHOR	= 1,
+	JE_SPILLOUT	= 2,
+};
+
+
+struct logfs_journal_entry {
+	be16	je_type;
+	be16	pad0;
+	be32	pad1;
+	be64	je_version;
+	union {
+		struct logfs_anchor da;
+	};
+}packed;
+
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+
+#define LOGFS_SUPER(sb) ((struct logfs_super*)(sb->s_fs_info))
+#define LOGFS_INODE(inode) container_of(inode, struct logfs_inode, vfs_inode)
+
+
+			/*	0	reserved for gc markers */
+#define LOGFS_INO_MASTER	1	/* inode file */
+#define LOGFS_INO_ROOT		2	/* root directory */
+#define LOGFS_INO_JOURNAL	3	/* journal */
+#define LOGFS_INO_ATIME		4	/* atime for all inodes */
+#define LOGFS_INO_BAD_BLOCKS	5	/* bad blocks */
+#define LOGFS_INO_OBSOLETE	6	/* obsolete block count */
+#define LOGFS_INO_ERASE_COUNT	7	/* erase count */
+#define LOGFS_RESERVED_INOS	16
+
+
+struct logfs_block {
+	u64	ino;
+	pos_t	pos;
+};
+
+
+struct logfs_super;
+struct logfs_segment {
+	struct logfs_super *super;
+	int	active;
+	u64	ofs;
+	u16	cur_block;
+	/* footer */
+	u32	erase_count;
+	u8	level;
+	struct logfs_block blocks[0];
+};
+
+
+struct logfs_super {
+	struct super_block *s_sb;		/* should get removed... */
+	struct mtd_info	*s_mtd;			/* underlying device */
+	struct inode	*s_master_inode;	/* ifile */
+	/* gc.c fields */
+	long	 s_segsize;			/* size of a segment */
+	long	 s_no_segs;			/* segments on device */
+	long	 s_blocksize;			/* size of a block */
+	long	 s_no_blocks;			/* blocks per segment */
+	u64	 s_last;			/* position of last used seg */
+	u64	 s_size;			/* filesystem size */
+	void	*s_gc_buf;			/* copy buf for cleansing */
+	struct logfs_segment *s_segs[LOGFS_SEGMENTS];	/* segment array */
+	u8	*s_valid_count;			/* # of valid block per seg */
+	long	 s_no_free_segs;		/* # of free segments */
+	u64	 s_maxec;			/* max erase count */
+	u64	 s_gec;				/* global erase count */
+	u64	 s_sweeper;			/* current sweeper pos */
+	u8	 s_ifile_levels;		/* max level of ifile */
+	u8	 s_iblock_levels;		/* max level of regular files */
+	u8	 s_data_levels;			/* # of segments to leaf block*/
+	u8	 s_total_levels;		/* sum of above three */
+	/* inode.c fields */
+	spinlock_t s_ino_lock;			/* lock s_last_ino on 32bit */
+	u64	 s_last_ino;			/* highest ino used */
+	struct inode *s_write_inode;		/* inode currently written */
+	struct mutex s_write_inode_mutex;	/* only one deletion at once */
+	/* journal.c fields */
+	struct mutex s_log_sem;
+	struct logfs_journal_entry *s_je;	/* new journal entry */
+	u64	 s_log_ofs;
+	u64	 s_log_len;
+	u64	 s_last_version;
+	u64	 s_anchor_ofs;
+	u32	 s_anchor_size;			/* size of anchor */
+	u32	 s_sum_start;			/* start of summary in anchor */
+	/* readwrite.c fields */
+	struct mutex s_r_sem;
+	struct mutex s_w_sem;
+	be64	*s_rblock;
+	be64	*s_wblock[LOGFS_MAX_INDIRECT+1];
+	u64	 s_free;			/* number of free blocks */
+	u64	 s_gc_reserve;
+	u64	 s_root_reserve;
+};
+
+
+struct logfs_inode {
+	struct inode vfs_inode;
+	u64 li_data[LOGFS_EMBEDDED_FIELDS];
+	u32 li_flags;
+	u64 li_blocks;
+};
+
+
+static inline size_t logfs_summary_size(struct logfs_super *super)
+{
+	return super->s_no_blocks * sizeof(struct logfs_block);
+}
+static inline size_t logfs_segstruct_size(struct logfs_super *super)
+{
+	return sizeof(struct logfs_segment) + logfs_summary_size(super);
+}
+
+
+static inline pgoff_t logfs_index(u64 pos)
+{
+	return pos / LOGFS_BLOCKSIZE;
+}
+
+
+static inline struct inode *logfs_iget(struct super_block *sb, ino_t ino)
+{
+	struct logfs_super *super = LOGFS_SUPER(sb);
+	if (ino == LOGFS_INO_MASTER) /* never iget this "inode"! */
+		return super->s_master_inode;
+	return iget(sb, ino);
+}
+static inline void logfs_iput(struct inode *inode)
+{
+	if (inode->i_ino == LOGFS_INO_MASTER) /* never iput it either! */
+		return;
+	iput(inode);
+}
+
+
+static inline struct logfs_disk_sum *alloc_disk_sum(struct logfs_super *super)
+{
+	return kmalloc(logfs_summary_size(super), GFP_ATOMIC);
+}
+static inline void free_disk_sum(struct logfs_disk_sum *sum)
+{
+	kfree(sum);
+}
+
+
+/* dir.c */
+extern struct inode_operations logfs_dir_iops;
+extern struct file_operations logfs_dir_fops;
+
+
+/* file.c */
+extern struct inode_operations logfs_reg_iops;
+extern struct file_operations logfs_reg_fops;
+extern struct address_space_operations logfs_reg_aops;
+
+int logfs_setattr(struct dentry *dentry, struct iattr *iattr);
+
+
+/* gc.c */
+void logfs_gc_pass(struct logfs_super *super);
+s64 logfs_get_free_block(struct logfs_super*super, int level, u64 ino, pos_t pos);
+int logfs_init_gc(struct logfs_super *super);
+void logfs_cleanup_gc(struct logfs_super *super);
+/* still gc.c, but should get moved to segment.c */
+int logfs_erase_segment(struct logfs_super *super, u64 ofs);
+
+void logfs_disk_to_sum(struct logfs_super *super, struct logfs_segment *seg,
+		struct logfs_disk_sum *sum);
+void logfs_sum_to_disk(struct logfs_super *super, struct logfs_segment *seg,
+		struct logfs_disk_sum *sum);
+int logfs_init_segments(struct logfs_super *super);
+void logfs_cleanup_segments(struct logfs_super *super);
+
+
+/* inode.c */
+extern struct super_operations logfs_super_operations;
+
+struct inode *logfs_new_inode(struct inode *dir, int mode);
+struct inode *logfs_new_master_inode(struct super_block *sb);
+int logfs_init_inode_cache(void);
+void logfs_destroy_inode_cache(void);
+
+
+/* journal.c */
+int logfs_write_anchor(struct inode *inode);
+int logfs_init_log(struct super_block *sb);
+void logfs_cleanup_log(struct super_block *sb);
+
+
+/* readwrite.c */
+int logfs_inode_read(struct inode *inode, void *buf, size_t n, loff_t _pos);
+int logfs_inode_write_nolock(struct inode *inode, const void *buf, size_t n,
+		loff_t _pos);
+int logfs_inode_write(struct inode *inode, const void *buf, size_t n,
+		loff_t pos);
+int logfs_inode_write_loop(struct inode *inode, const void *buf, size_t n,
+		loff_t _pos);
+
+int logfs_readpage(struct file *file, struct page *page);
+int logfs_write_buf(struct inode *inode, pgoff_t index, void *buf);
+int logfs_rewrite_block(struct inode *inode, pgoff_t index, u64 ofs, void *buf);
+int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 pos);
+void logfs_truncate(struct inode *inode);
+
+int logfs_init_rw(struct logfs_super *super);
+void logfs_cleanup_rw(struct logfs_super *super);
+void logfs_set_blocks(struct inode *inode, u64 no);
+
+/* super.c */
+int mtderase(struct mtd_info *mtd, loff_t ofs, size_t len);
+int mtdread(struct mtd_info *mtd, loff_t ofs, size_t len, void *buf);
+int mtdwrite(struct mtd_info *mtd, loff_t ofs, size_t len, void *buf);
+int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
+
+
+/* symlink.c */
+
+
+#define EOF	256
+
+
+#endif
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/dir.c	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,464 @@
+#include <linux/fs.h>
+#include <linux/mtd/mtd.h>
+
+#include "logfs.h"
+
+static inline u8 logfs_type(struct inode *inode)
+{
+	return (inode->i_mode >> 12) & 15;
+}
+
+
+static inline void logfs_inc_count(struct inode *inode)
+{
+	inode->i_nlink++;
+	mark_inode_dirty(inode);
+}
+
+
+static inline void logfs_dec_count(struct inode *inode)
+{
+	inode->i_nlink--;
+	mark_inode_dirty(inode);
+}
+
+
+typedef int (*dir_callback)(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t pos);
+
+
+static int __logfs_dir_walk(struct inode *dir, struct dentry *dentry,
+		dir_callback handler, struct logfs_disk_dentry *dd, loff_t *pos)
+{
+	struct qstr *name = dentry ? &dentry->d_name : NULL;
+	int ret;
+
+	TRACE();
+	BUG_ON(sizeof(*dd) != 256);
+	BUG_ON(i_size_read(dir) % sizeof(*dd));
+	for (; ; (*pos)++) {
+		ret = logfs_inode_read(dir, dd, sizeof(*dd),(*pos)*sizeof(*dd));
+		if (ret == -EOF)
+			return 0;
+		if (ret)
+			return ret;
+		if (be16_to_cpu(dd->namelen) == 0)
+			continue;
+
+		if (name) {
+			if (name->len != be16_to_cpu(dd->namelen))
+				continue;
+			if (memcmp(name->name, dd->name, name->len))
+				continue;
+		}
+
+		return handler(dir, dentry, dd, *pos);
+	}
+	return ret;
+}
+
+
+static int logfs_dir_walk(struct inode *dir, struct dentry *dentry,
+		dir_callback handler)
+{
+	struct logfs_disk_dentry dd;
+	loff_t pos = 0;
+	return __logfs_dir_walk(dir, dentry, handler, &dd, &pos);
+}
+
+
+static int logfs_lookup_handler(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t pos)
+{
+	struct inode *inode;
+
+	TRACE();
+	inode = iget(dir->i_sb, be64_to_cpu(dd->ino));
+	if (!inode)
+		return -EIO;
+	return PTR_ERR(d_splice_alias(inode, dentry));
+}
+
+
+static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct dentry *ret;
+
+	TRACE();
+	ret = ERR_PTR(logfs_dir_walk(dir, dentry, logfs_lookup_handler));
+	return ret;
+}
+
+
+/* unlink currently only makes the name length zero */
+static int logfs_unlink_handler(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t pos)
+{
+	TRACE();
+	dd->namelen = 0;
+	return logfs_inode_write(dir, dd, sizeof(*dd), pos*sizeof(*dd));
+}
+
+
+static void logfs_post_unlink(struct inode *dir, struct inode *inode)
+{
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	logfs_dec_count(inode);
+	printk("unlink: %lx, %x\n", inode->i_ino, inode->i_nlink);
+	mark_inode_dirty(dir);
+}
+
+
+static int logfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	int ret;
+
+	TRACE();
+	ret = logfs_dir_walk(dir, dentry, logfs_unlink_handler);
+	if (ret)
+		return ret;
+
+	logfs_post_unlink(dir, inode);
+	return 0;
+}
+
+
+static int logfs_empty_handler(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t pos)
+{
+	TRACE();
+	return -ENOTEMPTY;
+}
+static inline int logfs_empty_dir(struct inode *dir)
+{
+	return logfs_dir_walk(dir, NULL, logfs_empty_handler) == 0;
+}
+
+
+static void logfs_post_rmdir(struct inode *dir, struct inode *inode)
+{
+	inode->i_size = 0; /* FIXME: why this? */
+	logfs_dec_count(inode);
+	logfs_dec_count(dir);
+}
+
+
+static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	int err;
+
+	TRACE();
+	if (!logfs_empty_dir(inode))
+		return -ENOTEMPTY;
+
+	err = logfs_unlink(dir, dentry);
+	if (err)
+		return err;
+
+	logfs_post_rmdir(dir, inode);
+	return 0;
+}
+
+
+/* FIXME: readdir currently has it's own dir_walk code.  I don't see a good
+ * way to combine the two copies */
+#define IMPLICIT_NODES 2
+static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
+{
+	struct logfs_disk_dentry dd;
+	loff_t pos = file->f_pos - IMPLICIT_NODES;
+	int err;
+
+	TRACE();
+	BUG_ON(pos<0);
+	for (;; pos++) {
+		struct inode *dir = file->f_dentry->d_inode;
+		err = logfs_inode_read(dir, &dd, sizeof(dd), pos*sizeof(dd));
+		if (err == -EOF)
+			break;
+		if (err)
+			return err;
+
+		/* zero-length indicates deleted dentries */
+		if (be16_to_cpu(dd.namelen) == 0)
+			continue;
+
+		if (filldir(buf, dd.name, be16_to_cpu(dd.namelen), pos,
+					be64_to_cpu(dd.ino), dd.type))
+			break;
+	}
+
+	file->f_pos = pos + IMPLICIT_NODES;
+	return 0;
+}
+
+
+static int logfs_readdir(struct file *file, void *buf, filldir_t filldir)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+
+	TRACE();
+	if (file->f_pos < 0)
+		return -EINVAL;
+
+	if (file->f_pos == 0) {
+		if (filldir(buf, ".", 1, 1, inode->i_ino, DT_DIR) < 0)
+			return 0;
+		file->f_pos++;
+	}
+	if (file->f_pos == 1) {
+		ino_t pino = parent_ino(file->f_dentry);
+		if (filldir(buf, "..", 2, 2, pino, DT_DIR) < 0)
+			return 0;
+		file->f_pos++;
+	}
+
+	return __logfs_readdir(file, buf, filldir);
+}
+
+
+static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
+		struct inode *inode)
+{
+	struct logfs_disk_dentry dd;
+	int err;
+
+	memset(&dd, 0, sizeof(dd));
+	dd.ino = cpu_to_be64(inode->i_ino);
+	dd.namelen = cpu_to_be16(dentry->d_name.len);
+	BUG_ON(dentry->d_name.len > LOGFS_MAX_NAMELEN);
+	memcpy(dd.name, dentry->d_name.name, dentry->d_name.len);
+	dd.type = logfs_type(inode);
+	err = logfs_inode_write(dir, &dd, sizeof(dd), i_size_read(dir));
+	if (err) {
+		logfs_dec_count(inode);
+		iput(inode);
+		return err;
+	}
+	d_instantiate(dentry, inode);
+	return 0;
+}
+
+
+/* FIXME: This should really be somewhere in the 64bit area. */
+#define LOGFS_LINK_MAX (2^30)
+static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct inode *inode;
+	int ret;
+
+	TRACE();
+	if (dir->i_nlink >= LOGFS_LINK_MAX)
+		return -EMLINK;
+
+	logfs_inc_count(dir);
+
+	/* FIXME: why do we have to fill in S_IFDIR, while the mode is
+	 * correct for mknod, creat, etc.?
+	 */
+	inode = logfs_new_inode(dir, S_IFDIR | mode);
+	if (IS_ERR(inode)) {
+		logfs_dec_count(dir);
+		return PTR_ERR(inode);
+	}
+
+	inode->i_op = &logfs_dir_iops;
+	inode->i_fop = &logfs_dir_fops;
+
+	logfs_inc_count(inode);
+
+	ret = logfs_write_dir(dir, dentry, inode);
+	if (ret) {
+		logfs_dec_count(inode);
+		logfs_dec_count(inode);
+		iput(inode);
+		logfs_dec_count(dir);
+	}
+	return ret;
+}
+
+
+static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	struct inode *inode;
+
+	TRACE();
+	inode = logfs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &logfs_reg_iops;
+	inode->i_fop = &logfs_reg_fops;
+	inode->i_mapping->a_ops = &logfs_reg_aops;
+	mark_inode_dirty(inode);
+
+	return logfs_write_dir(dir, dentry, inode);
+}
+
+
+static int logfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+		dev_t rdev)
+{
+	struct inode *inode;
+	int ret;
+
+	TRACE();
+	BUG_ON(dentry->d_name.len > LOGFS_MAX_NAMELEN);
+
+	inode = logfs_new_inode(dir, mode);
+	ret = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out;
+
+	init_special_inode(inode, mode, rdev);
+	mark_inode_dirty(inode);
+
+	ret = logfs_write_dir(dir, dentry, inode);
+out:
+	return ret;
+}
+
+
+static struct inode_operations ext2_symlink_iops = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+};
+
+
+static int logfs_symlink(struct inode *dir, struct dentry *dentry,
+		const char *target)
+{
+	size_t len = strlen(target) + 1;
+	struct inode *inode;
+	int ret;
+
+	inode = logfs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &ext2_symlink_iops;
+	inode->i_mapping->a_ops = &logfs_reg_aops;
+
+	ret = logfs_inode_write_loop(inode, target, len, 0);
+	if (ret) {
+		logfs_dec_count(inode);
+		iput(inode);
+		return ret;
+	}
+	return logfs_write_dir(dir, dentry, inode);
+}
+
+
+static int logfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	return generic_permission(inode, mask, NULL);
+}
+
+
+static int logfs_link(struct dentry *old_dentry, struct inode *dir,
+		struct dentry *dentry)
+{
+	struct inode *inode = old_dentry->d_inode;
+
+	TRACE();
+	if (inode->i_nlink >= LOGFS_LINK_MAX)
+		return -EMLINK;
+
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	logfs_inc_count(inode);
+	atomic_inc(&inode->i_count);
+
+	return logfs_write_dir(dir, dentry, inode);
+}
+
+
+static int logfs_nop_handler(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t pos)
+{
+	return 0;
+}
+static inline int logfs_get_dd(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t *pos)
+{
+	return __logfs_dir_walk(dir, dentry, logfs_nop_handler, dd, pos);
+}
+
+
+static int logfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	int isdir = S_ISDIR(old_inode->i_mode);
+	int err;
+
+	TRACE();
+	/* FIXME: request sync semaphore */
+	if (new_inode) { /* replace */
+		struct logfs_disk_dentry new_dd;
+		loff_t new_pos = 0;
+
+		BUG_ON(isdir && !S_ISDIR(new_inode->i_mode));
+		if (isdir) {
+			if (!logfs_empty_dir(new_inode))
+				return -ENOTEMPTY;
+		}
+
+		err = logfs_get_dd(new_dir, new_dentry, &new_dd, &new_pos);
+		BUG_ON(err); /* target should exist */
+
+		/* replace old dd */
+		new_dd.ino = cpu_to_be64(old_inode->i_ino);
+		new_dd.type = logfs_type(old_inode);
+		err = logfs_inode_write(new_dir, &new_dd, sizeof(new_dd),
+				new_pos*sizeof(new_dd));
+		if (err)
+			return err;
+
+		logfs_post_unlink(new_dir, new_inode);
+
+		logfs_inc_count(old_inode);
+		err = logfs_unlink(old_dir, old_dentry);
+		BUG_ON(err); /* FUCK!  we need to clean things up instead */
+
+		/* remove new_inode from old_dir - old_inode has moved to
+		 * new_dir, so this is correct */
+		if (isdir)
+			logfs_post_rmdir(old_dir, new_inode);
+
+		return err;
+	} else { /* just a move */
+		err = logfs_link(old_dentry, new_dir, new_dentry);
+		if (err)
+			return err;
+
+		if (isdir) {
+			logfs_inc_count(new_dir);
+			return logfs_rmdir(old_dir, old_dentry);
+		} else {
+			return logfs_unlink(old_dir, old_dentry);
+		}
+	}
+}
+
+
+struct inode_operations logfs_dir_iops = {
+	.create		= logfs_create,
+	.link		= logfs_link,
+	.lookup		= logfs_lookup,
+	.mkdir		= logfs_mkdir,
+	.mknod		= logfs_mknod,
+	.rename		= logfs_rename,
+	.rmdir		= logfs_rmdir,
+	.permission	= logfs_permission,
+	.symlink	= logfs_symlink,
+	.unlink		= logfs_unlink,
+};
+struct file_operations logfs_dir_fops = {
+	.readdir	= logfs_readdir,
+	.read		= generic_read_dir,
+};
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/file.c	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,82 @@
+#include <linux/mtd/mtd.h>
+#include <linux/pagemap.h>
+
+#include "logfs.h"
+
+
+static int logfs_prepare_write(struct file *file, struct page *page,
+               unsigned start, unsigned end)
+{
+       void *buf;
+
+       TRACE();
+       buf = kmap(page);
+       if (!PageUptodate(page)) {
+	       /* FIXME: shouldn't we read instead? */
+               memset(buf, 0, PAGE_CACHE_SIZE);
+               SetPageUptodate(page);
+       }
+
+       //set_page_dirty(page);
+       kunmap(page);
+
+       return 0;
+}
+
+
+static int logfs_commit_write(struct file *file, struct page *page,
+		unsigned start, unsigned end)
+{
+	struct inode *inode = page->mapping->host;
+	pgoff_t index = page->index;
+	void *buf;
+	int ret;
+
+	TRACE();
+	pr_debug("ino: %lu, page:%lu, start: %d, len:%d\n", inode->i_ino,
+			page->index, start, end-start);
+	BUG_ON(PAGE_CACHE_SIZE != LOGFS_BLOCKSIZE);
+	BUG_ON(page->index > I3_BLOCKS);
+
+	if (start == end)
+		return 0; /* FIXME: do we need to update inode? */
+
+	if (i_size_read(inode) < (index << PAGE_CACHE_SHIFT) + end) {
+		i_size_write(inode, (index << PAGE_CACHE_SHIFT) + end);
+		mark_inode_dirty(inode);
+	}
+
+	buf = kmap(page);
+	ret = logfs_write_buf(inode, index, buf);
+	kunmap(page);
+	return ret;
+}
+
+
+static int logfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	BUG();
+	return 0;
+}
+
+
+struct inode_operations logfs_reg_iops = {
+	.truncate	= logfs_truncate,
+};
+
+
+struct file_operations logfs_reg_fops = {
+	.llseek		= generic_file_llseek,
+	.open		= generic_file_open,
+	.read		= generic_file_read,
+	.write		= generic_file_write,
+};
+
+
+struct address_space_operations logfs_reg_aops = {
+	.commit_write	= logfs_commit_write,
+	.prepare_write	= logfs_prepare_write,
+	.readpage	= logfs_readpage,
+	.set_page_dirty	= __set_page_dirty_nobuffers,
+	.writepage	= logfs_writepage,
+};
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/gc.c	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,517 @@
+#include <linux/fs.h>
+#include <linux/mtd/mtd.h>
+
+#include "logfs.h"
+
+
+static int logfs_is_valid_block_gc(struct super_block *sb, u64 ofs, u64 ino,
+		pos_t pos)
+{
+	/* Umount closes a segment with free blocks remaining.  Those
+	 * blocks are by definition invalid. */
+	if (ino == -1)
+		return 0;
+
+	/* Journal blocks are by definition valid.  They will get
+	 * erased from journal.c sooner or later. */
+	if (ino == LOGFS_INO_JOURNAL)
+		return 1;
+
+	return logfs_is_valid_block(sb, ofs, ino, pos);
+}
+
+
+static int logfs_valid_count(struct logfs_super *super, u64 seg_offset)
+{
+	struct mtd_info *mtd = super->s_mtd;
+	struct logfs_disk_sum *sum;
+	int end = super->s_no_blocks-1;
+	u64 footer = seg_offset + end * super->s_blocksize;
+	int valid = 0;
+	int i, err;
+
+	/* Superblock.  Don't touch, or else... */
+	if (seg_offset == 0)
+		return end;
+	/* Primary journal segments.  Don't touch, or else... */
+	if ((seg_offset >= super->s_log_ofs)
+			&& (seg_offset - super->s_log_ofs < super->s_log_len))
+		return end;
+
+	/* Currently open segments */
+	for (i=0; i<LOGFS_SEGMENTS; i++) {
+		struct logfs_segment *seg = super->s_segs[i];
+		if (seg->active && (seg->ofs == seg_offset))
+			return end;
+	}
+
+	sum = alloc_disk_sum(super);
+	err = mtdread(mtd, footer, logfs_summary_size(super), sum);
+	BUG_ON(err);
+
+	for (i=0; i<end; i++) {
+		u64 ino = be64_to_cpu(sum->blocks[i].ino);
+		u64 ofs = seg_offset + i*super->s_blocksize;
+		pos_t pos = be64_to_pos(sum->blocks[i].pos);
+		if (logfs_is_valid_block_gc(super->s_sb, ofs, ino, pos))
+			valid++;
+		//printk("%6llx, %4llx, %6llx, %x\n", ofs, ino, pos, valid);
+	}
+	free_disk_sum(sum);
+	return valid;
+}
+
+
+int logfs_erase_segment(struct logfs_super *super, u64 ofs)
+{
+	u64 end = ofs + super->s_segsize;
+	long step = super->s_blocksize;
+	int ret;
+
+	super->s_gec++;
+
+	for ( ; ofs < end; ofs += step) {
+		ret = mtderase(super->s_mtd, ofs, step);
+		if (ret)
+			return -EIO;
+	}
+	return 0;
+}
+
+
+static void scan_valid_segments(struct logfs_super *super)
+{
+	int i;
+	int valid;
+
+	super->s_no_free_segs = 0;
+	for (i=0; i<super->s_no_segs; i++) {
+		u64 ofs = (u64)i * super->s_segsize;
+
+		valid = logfs_valid_count(super, ofs);
+		super->s_valid_count[i] = valid;
+		if (valid == 0)
+			super->s_no_free_segs++;
+	}
+}
+
+
+static int logfs_level(struct logfs_super *super, u64 seg_offset)
+{
+	struct mtd_info *mtd = super->s_mtd;
+	struct logfs_disk_sum *sum;
+	int end = super->s_no_blocks-1;
+	u64 footer = seg_offset + end * super->s_blocksize;
+	int i, level, err;
+
+	/* Currently open segments */
+	for (i=0; i<LOGFS_SEGMENTS; i++) {
+		struct logfs_segment *seg = super->s_segs[i];
+		if (seg->active && (seg->ofs == seg_offset))
+			return seg->level;
+	}
+
+	sum = alloc_disk_sum(super);
+	err = mtdread(mtd, footer, logfs_summary_size(super), sum);
+	BUG_ON(err);
+
+	level = sum->level;
+	free_disk_sum(sum);
+	return level;
+}
+
+
+static void dump_segments(struct logfs_super *super)
+{
+	int i;
+	int valid;
+
+	//return;
+	for (i=0; i<super->s_no_segs; i++) {
+		u64 ofs = (u64)i * super->s_segsize;
+
+		valid = logfs_valid_count(super, ofs);
+		printk("%5llx, %3d, %2d, %2d ", ofs, logfs_level(super, ofs),
+				super->s_valid_count[i], valid);
+		if (i & 1)
+			printk("\n");
+	}
+}
+
+
+static void logfs_cleanse_block(struct logfs_super *super, u64 ofs, u64 ino,
+		pos_t pos)
+{
+	struct super_block *sb = super->s_sb;
+	struct inode *inode;
+	int err;
+
+	inode = logfs_iget(sb, ino);
+	BUG_ON(!inode);
+	err = logfs_rewrite_block(inode, logfs_index(pos), ofs, NULL);
+	BUG_ON(err);
+	logfs_iput(inode);
+}
+
+
+static void __logfs_gc_segment(struct logfs_super *super, u64 seg_ofs)
+{
+	struct mtd_info *mtd = super->s_mtd;
+	struct logfs_disk_sum *sum;
+	int end = super->s_no_blocks-1;
+	u64 footer = seg_ofs + end * super->s_blocksize;
+	int i, err;
+
+	sum = alloc_disk_sum(super);
+	err = mtdread(mtd, footer, logfs_summary_size(super), sum);
+	BUG_ON(err);
+
+	for (i=0; i<end; i++) {
+		u64 ino = be64_to_cpu(sum->blocks[i].ino);
+		u64 ofs = seg_ofs + i*super->s_blocksize;
+		pos_t pos = be64_to_pos(sum->blocks[i].pos);
+		if (! logfs_is_valid_block_gc(super->s_sb, ofs, ino, pos))
+			continue;
+
+		logfs_cleanse_block(super, ofs, ino, pos);
+	}
+	free_disk_sum(sum);
+}
+
+
+static void logfs_gc_segment(struct logfs_super *super, u64 seg_ofs)
+{
+	int i;
+
+	/* Superblock.  Don't touch, or else... */
+	BUG_ON(seg_ofs == 0);
+	/* Primary journal segments.  Don't touch, or else... */
+	BUG_ON((seg_ofs >= super->s_log_ofs)
+			&& (seg_ofs - super->s_log_ofs < super->s_log_len));
+
+	/* Currently open segments */
+	for (i=0; i<LOGFS_SEGMENTS; i++) {
+		struct logfs_segment *seg = super->s_segs[i];
+		BUG_ON(seg->active && (seg->ofs == seg_ofs));
+	}
+	__logfs_gc_segment(super, seg_ofs);
+}
+
+
+static void logfs_gc_once(struct logfs_super *super)
+{
+	int end = super->s_no_blocks-1;
+	int i;
+
+	for (i = super->s_sweeper+1; 1 != super->s_sweeper; i++) {
+		u64 ofs;
+		int valid;
+
+		if (i >= super->s_no_segs)
+			i=1;	/* skip superblock */
+
+		ofs = (u64)i * super->s_segsize;
+		valid = super->s_valid_count[i];
+
+		if (valid == 0)
+			continue;
+		if (valid > end-1)
+			continue;
+		printk("candidate: %5llx\n", ofs);
+		logfs_gc_segment(super, ofs);
+		super->s_valid_count[i] = 0;
+		super->s_no_free_segs++;
+		super->s_sweeper = i;
+		return;
+	}
+	BUG();
+}
+
+
+void logfs_gc_pass(struct logfs_super *super)
+{
+	u8 reserve = super->s_total_levels;
+	int passes = 0;
+
+	for (;;) {
+		BUG_ON(passes++ > 100);
+
+		if (super->s_no_free_segs >= reserve)
+			return;
+
+		scan_valid_segments(super);
+
+		if (super->s_no_free_segs >= reserve)
+			return;
+
+		dump_segments(super);
+		logfs_gc_once(super);
+		dump_segments(super);
+		continue;
+	}
+}
+
+
+static u64 find_free_segment(struct logfs_super *super)
+{
+	int i;
+
+	for (i=0; i<super->s_no_segs; i++) {
+		u64 ofs = (u64)i * super->s_segsize;
+
+		if (super->s_valid_count[i] != 0)
+			continue;
+
+		BUG_ON(logfs_valid_count(super, ofs) != 0);
+
+		super->s_valid_count[i] = super->s_no_blocks-1;
+		super->s_no_free_segs--;
+		return ofs;
+	}
+
+	BUG();
+	return 0;
+}
+
+
+static int logfs_open_segment(struct logfs_segment *seg)
+{
+	struct logfs_super *super = seg->super;
+	struct mtd_info *mtd = super->s_mtd;
+	struct logfs_disk_sum *sum;
+	int end = super->s_no_blocks-1;
+	u64 footer;
+	int err;
+
+	TRACE();
+	if (seg->active)
+		return 0; /* nothing to do */
+
+	seg->ofs = find_free_segment(super);
+	seg->cur_block = 0;
+
+	sum = alloc_disk_sum(super);
+	footer = seg->ofs + end * super->s_blocksize;
+	err = mtdread(mtd, footer, logfs_summary_size(super), sum);
+	seg->erase_count = be32_to_cpu(sum->erase_count) + 1;
+	super->s_maxec = max(super->s_maxec, (u64)seg->erase_count);
+	//printk("open %8llx, %x\n", seg->ofs, seg->erase_count);
+	free_disk_sum(sum);
+	BUG_ON(err); /* FIXME */
+	if (err)
+		return err;
+
+	memset(&seg->blocks, 0xff, logfs_summary_size(super));
+	seg->active = 1;
+
+	logfs_erase_segment(super, seg->ofs);
+
+	return 0;
+}
+
+
+void logfs_disk_to_sum(struct logfs_super *super, struct logfs_segment *seg,
+		struct logfs_disk_sum *sum)
+{
+	int end = super->s_no_blocks-1;
+	int i;
+
+	seg->ofs = be64_to_cpu(sum->ofs);
+	if (seg->ofs + 1 == 0) {	/* inactive segment */
+		seg->active = 0;
+		return;
+	}
+
+	seg->active = 1;
+	seg->erase_count = be32_to_cpu(sum->erase_count);
+	seg->level = sum->level;
+
+	seg->cur_block = 0;
+	for (i=0; i<end; i++) {
+		seg->blocks[i].ino = be64_to_cpu(sum->blocks[i].ino);
+		seg->blocks[i].pos = be64_to_pos(sum->blocks[i].pos);
+		if (seg->blocks[i].ino + 1 != 0)
+			seg->cur_block = i+1;
+	}
+}
+
+
+void logfs_sum_to_disk(struct logfs_super *super, struct logfs_segment *seg,
+		struct logfs_disk_sum *sum)
+{
+	int end = super->s_no_blocks-1;
+	int i;
+
+	if (!seg->active) {
+		memset(sum, 0xff, logfs_summary_size(super));
+		return;
+	}
+
+	for (i=0; i<end; i++) {
+		sum->blocks[i].ino = cpu_to_be64(seg->blocks[i].ino);
+		sum->blocks[i].pos = pos_to_be64(seg->blocks[i].pos);
+	}
+	sum->erase_count = cpu_to_be32(seg->erase_count);
+	sum->level = seg->level;
+	sum->pad[0] = 's';
+	sum->pad[1] = 'u';
+	sum->pad[2] = 'm';
+	sum->ofs = cpu_to_be64(seg->ofs);
+}
+
+
+static int logfs_finish_segment(struct logfs_super *super,
+		struct logfs_segment *seg)
+{
+	struct mtd_info *mtd = super->s_mtd;
+	struct logfs_disk_sum *sum;
+	int end = super->s_no_blocks-1;
+	u64 ofs = seg->ofs + end * super->s_blocksize;
+	int err;
+
+	TRACE();
+	if (!seg->active)
+		return 0;
+
+	sum = alloc_disk_sum(super);
+	logfs_sum_to_disk(super, seg, sum);
+	sum->gec = cpu_to_be64(super->s_gec); /* we overwrite ofs here */
+	err = mtdwrite(mtd, ofs, logfs_summary_size(super), sum);
+	//printk("finish %8llx, %x\n", seg->ofs, seg->erase_count);
+	free_disk_sum(sum);
+	BUG_ON(err); /* lacking any better means */
+	return err;
+}
+
+
+static void logfs_close_segment(struct logfs_segment *seg)
+{
+	struct logfs_super *super = seg->super;
+
+	if (seg->cur_block == super->s_no_blocks - 1) {
+		logfs_finish_segment(super, seg); /* FIXME: return code */
+		seg->active = 0;
+	}
+}
+
+
+static s64 __logfs_get_free_block(struct logfs_segment *seg, u64 ino, pos_t pos)
+{
+	struct logfs_super *super = seg->super;
+	//struct logfs_segment *bug = super->s_segs[1];
+	s64 ofs;
+	int ret;
+
+	TRACE();
+	ret = logfs_open_segment(seg);
+	BUG_ON(ret>0);
+	if (ret)
+		return ret;
+
+	seg->blocks[seg->cur_block].ino = ino;
+	seg->blocks[seg->cur_block].pos = pos;
+
+	ofs = seg->ofs + seg->cur_block * super->s_blocksize;
+	seg->cur_block++;
+	logfs_close_segment(seg);
+
+	BUG_ON(ofs >= super->s_size);
+	return ofs;
+}
+
+
+s64 logfs_get_free_block(struct logfs_super *super, int level, u64 ino, pos_t pos)
+{
+	s64 ret;
+
+	TRACE();
+	BUG_ON(level >= LOGFS_MAX_LEVELS);
+
+	if (ino == LOGFS_INO_MASTER)	/* ifile has seperate segments */
+		level += LOGFS_MAX_LEVELS;
+
+	ret = __logfs_get_free_block(super->s_segs[level], ino, pos);
+	BUG_ON(ret <= 0); /* not sure, but it's safer to BUG than to accept */
+	return ret;
+}
+
+
+static void logfs_measure_free_space(struct logfs_super *super)
+{
+	u64 free = 0;
+	int i;
+
+	/* FIXME: superblock also needs a "don't touch" marker */
+	for (i=0; i<super->s_no_segs; i++) {
+		int valid = super->s_valid_count[i];
+		free += super->s_segsize/LOGFS_BLOCKSIZE - 1 - valid;
+	}
+	super->s_free = free;
+	printk("free: %lld\n", free);
+	return;
+}
+
+
+int logfs_init_segments(struct logfs_super *super)
+{
+	struct logfs_segment *segs;
+	int i;
+
+	segs = kzalloc(LOGFS_SEGMENTS * logfs_segstruct_size(super),
+			GFP_KERNEL);
+	if (!segs)
+		return -ENOMEM;
+	for (i=0; i<LOGFS_SEGMENTS; i++) {
+		super->s_segs[i] = (void*)segs + i*logfs_segstruct_size(super);
+		super->s_segs[i]->super = super;
+		super->s_segs[i]->level = i;
+	}
+	return 0;
+}
+
+
+/* needs to be called after init_journal, as we use s_last here */
+int logfs_init_gc(struct logfs_super *super)
+{
+	u64 div;
+
+	div = super->s_size;
+	do_div(div, super->s_segsize);
+	super->s_no_segs = div;
+
+	super->s_valid_count = kmalloc(super->s_no_segs, GFP_KERNEL);
+	if (!super->s_valid_count)
+		goto fail0;
+
+	super->s_gc_buf = kmalloc(super->s_blocksize, GFP_KERNEL);
+	if (!super->s_gc_buf)
+		goto fail1;
+
+	/* order is important - scan first */
+	scan_valid_segments(super);
+	logfs_measure_free_space(super);
+
+	return 0;
+fail1:
+	kfree(super->s_valid_count);
+fail0:
+	return -ENOMEM;
+}
+
+
+void logfs_cleanup_segments(struct logfs_super *super)
+{
+	int i;
+	for (i=0; i<LOGFS_SEGMENTS; i++) {
+		/* commit to flash */
+		//logfs_finish_segment(super, super->s_segs[i]);
+	}
+	kfree(super->s_segs[0]);
+}
+
+
+void logfs_cleanup_gc(struct logfs_super *super)
+{
+	kfree(super->s_gc_buf);
+	kfree(super->s_valid_count);
+}
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/inode.c	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,385 @@
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mtd/mtd.h>
+
+#include "logfs.h"
+
+
+static kmem_cache_t *logfs_inode_cache;
+
+
+static struct inode *logfs_alloc_inode(struct super_block *sb)
+{
+	struct logfs_inode *li;
+
+	TRACE();
+	li = kmem_cache_alloc(logfs_inode_cache, SLAB_KERNEL);
+	if (!li)
+		return NULL;
+	return &li->vfs_inode;
+}
+
+
+static void logfs_init_inode(struct inode *inode)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	int i;
+
+	TRACE();
+	li->li_flags	= LOGFS_IF_VALID;
+	li->li_blocks	= 0;
+	inode->i_uid	= 0;
+	inode->i_gid	= 0;
+	inode->i_size	= 0;
+	inode->i_blocks	= 0;
+	inode->i_ctime	= CURRENT_TIME;
+	inode->i_mtime	= CURRENT_TIME;
+	inode->i_nlink	= 1;
+
+	for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = 0;
+
+	return;
+}
+
+
+struct inode *logfs_new_master_inode(struct super_block *sb)
+{
+	struct inode *inode;
+
+	inode = logfs_alloc_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	logfs_init_inode(inode);
+	inode->i_mode = 0;
+	inode->i_ino = LOGFS_INO_MASTER;
+	inode->i_sb = sb;
+
+	return inode;
+}
+
+
+static struct timespec be64_to_timespec(be64 betime)
+{
+	u64 time = be64_to_cpu(betime);
+	struct timespec tsp;
+	tsp.tv_sec = time >> 32;
+	tsp.tv_nsec = time & 0xffffffff;
+	return tsp;
+}
+
+
+static be64 timespec_to_be64(struct timespec tsp)
+{
+	u64 time = ((u64)tsp.tv_sec << 32) + (tsp.tv_nsec & 0xffffffff);
+	return cpu_to_be64(time);
+}
+
+
+static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	int i;
+
+	TRACE();
+	inode->i_mode	= be16_to_cpu(di->di_mode);
+	li->li_flags	= be32_to_cpu(di->di_flags);
+	inode->i_uid	= be32_to_cpu(di->di_uid);
+	inode->i_gid	= be32_to_cpu(di->di_gid);
+	inode->i_size	= be64_to_cpu(di->di_size);
+	logfs_set_blocks(inode, be64_to_cpu(di->di_blocks));
+	inode->i_ctime	= be64_to_timespec(di->di_ctime);
+	inode->i_mtime	= be64_to_timespec(di->di_mtime);
+	inode->i_nlink	= be32_to_cpu(di->di_refcount);
+	inode->i_generation = be32_to_cpu(di->di_generation);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFCHR: /* fall through */
+	case S_IFBLK: /* fall through */
+	case S_IFIFO:
+		inode->i_rdev = be64_to_cpu(di->di_data[0]);
+		break;
+	default:
+		for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)
+			li->li_data[i] = be64_to_cpu(di->di_data[i]);
+		break;
+	}
+}
+
+
+static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	int i;
+
+	TRACE();
+	di->di_mode	= cpu_to_be16(inode->i_mode);
+	di->di_pad	= 0;
+	di->di_flags	= cpu_to_be32(li->li_flags);
+	di->di_uid	= cpu_to_be32(inode->i_uid);
+	di->di_gid	= cpu_to_be32(inode->i_gid);
+	di->di_size	= cpu_to_be64(i_size_read(inode));
+	di->di_blocks	= cpu_to_be64(li->li_blocks);
+	di->di_ctime	= timespec_to_be64(inode->i_ctime);
+	di->di_mtime	= timespec_to_be64(inode->i_mtime);
+	di->di_refcount	= cpu_to_be32(inode->i_nlink);
+	di->di_generation = cpu_to_be32(inode->i_generation);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFCHR: /* fall through */
+	case S_IFBLK: /* fall through */
+	case S_IFIFO:
+		di->di_data[0] = cpu_to_be64(inode->i_rdev);
+		break;
+	default:
+		for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)
+			di->di_data[i] = cpu_to_be64(li->li_data[i]);
+		break;
+	}
+}
+
+
+static void dump_di(struct logfs_disk_inode *di)
+{
+	int i, k;
+	return;
+	for (i=0; i<sizeof(struct logfs_disk_inode); i+=32) {
+		for (k=0; k<32; k+=4) {
+			be32 *x = ((void*)di) + i + k;
+			printk("%08x ", be32_to_cpu(*x));
+		}
+		printk("\n");
+	}
+}
+
+
+static int logfs_read_disk_inode(struct logfs_disk_inode *di,
+		struct inode *inode)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	ino_t ino = inode->i_ino;
+	int ret;
+
+	TRACE();
+
+	BUG_ON(!super->s_master_inode);
+	ret = logfs_inode_read(super->s_master_inode, di, sizeof(*di),
+			ino * sizeof(*di));
+	dump_di(di);
+	if (ret)
+		return -EIO;
+
+	if ( !(be32_to_cpu(di->di_flags) & LOGFS_IF_VALID))
+		return -EIO;
+
+	if (be32_to_cpu(di->di_flags) & LOGFS_IF_INVALID)
+		return -EIO;
+
+	return 0;
+}
+
+
+static int __logfs_read_inode(struct inode *inode)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_disk_inode di;
+	int ret;
+
+	TRACE();
+	ret = logfs_read_disk_inode(&di, inode);
+	if (ret)
+		return ret;
+	logfs_disk_to_inode(&di, inode);
+
+	if ( !(li->li_flags&LOGFS_IF_VALID) || (li->li_flags&LOGFS_IF_INVALID))
+		return -EIO;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFDIR:
+		inode->i_op = &logfs_dir_iops;
+		inode->i_fop = &logfs_dir_fops;
+		break;
+	case S_IFREG:
+		inode->i_op = &logfs_reg_iops;
+		inode->i_fop = &logfs_reg_fops;
+		inode->i_mapping->a_ops = &logfs_reg_aops;
+		break;
+	default:
+		;
+	}
+
+	return 0;
+}
+
+
+static void logfs_read_inode(struct inode *inode)
+{
+	int ret;
+
+	TRACE();
+	BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
+
+	ret = __logfs_read_inode(inode);
+	if (ret) {
+		printk("%lx\n", inode->i_ino);
+		BUG();
+	}
+}
+
+
+static int logfs_write_disk_inode(struct logfs_disk_inode *di,
+		struct inode *inode)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	int ret;
+
+	TRACE();
+	BUG_ON(!super->s_master_inode);
+	ret = logfs_inode_write_nolock(super->s_master_inode, di, sizeof(*di),
+			inode->i_ino * sizeof(*di));
+	dump_di(di);
+	return ret;
+}
+
+
+static int logfs_write_inode(struct inode *inode, int do_sync)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct logfs_disk_inode old, new;
+	int ret = 0;
+
+	BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
+
+	mutex_lock(&super->s_write_inode_mutex);
+	super->s_write_inode = inode;
+
+	logfs_inode_to_disk(inode, &new);
+	if (logfs_read_disk_inode(&old, inode))
+		ret = logfs_write_disk_inode(&new, inode);
+	if (memcmp(&old, &new, sizeof(old)))
+		ret = logfs_write_disk_inode(&new, inode);
+
+	super->s_write_inode = NULL;
+	mutex_unlock(&super->s_write_inode_mutex);
+	return ret;
+}
+
+
+static void logfs_delete_inode(struct inode *inode)
+{
+	TRACE();
+	if (i_size_read(inode) > 0) {
+		i_size_write(inode, 0);
+		logfs_truncate(inode);
+		truncate_inode_pages(&inode->i_data, 0);
+	}
+	logfs_write_inode(inode, 1);
+	clear_inode(inode);
+}
+
+
+static void logfs_destroy_inode(struct inode *inode)
+{
+	TRACE();
+	kmem_cache_free(logfs_inode_cache, LOGFS_INODE(inode));
+}
+
+
+static u64 logfs_get_ino(struct super_block *sb)
+{
+	struct logfs_super *super = LOGFS_SUPER(sb);
+	u64 ino;
+
+	/* FIXME: ino allocation should work in two modes:
+	 * o nonsparse - ifile is mostly occupied, just append
+	 * o sparse - ifile has lots of holes, fill them up
+	 */
+	spin_lock(&super->s_ino_lock);
+	ino = super->s_last_ino; /* ifile shouldn't be too sparse */
+	super->s_last_ino++;
+	spin_unlock(&super->s_ino_lock);
+	return ino;
+}
+
+
+struct inode *logfs_new_inode(struct inode *dir, int mode)
+{
+	struct super_block *sb = dir->i_sb;
+	struct inode *inode;
+	int ret;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	logfs_init_inode(inode);
+
+	inode->i_mode = mode;
+	inode->i_ino = logfs_get_ino(sb);
+
+	/* If we don't spend an option early, we may end up with dirty inodes
+	 * and no options left at umount time.  So write this inode NOW.
+	 */
+	ret = logfs_write_inode(inode, 1);
+	if (ret) {
+		logfs_destroy_inode(inode);
+		/* FIXME: we just grew super->s_last_ino */
+		return NULL;
+	}
+	/* FIXME: need to understand vfs inode handling a bit more */
+	insert_inode_hash(inode);
+
+	return inode;
+}
+
+
+static void logfs_init_once(void *_li, kmem_cache_t *cachep,
+		unsigned long flags)
+{
+	struct logfs_inode *li = _li;
+	struct inode *inode = _li;
+	int i;
+
+	TRACE();
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+			SLAB_CTOR_CONSTRUCTOR) {
+		li->li_flags = 0;
+		li->li_blocks = 0;
+		for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)
+			li->li_data[i] = 0;
+		inode_init_once(&li->vfs_inode);
+		/* FIXME: would block size be better? */
+		inode->i_blksize = PAGE_SIZE;
+	}
+
+}
+
+
+struct super_operations logfs_super_operations = {
+	.alloc_inode	= logfs_alloc_inode,
+	.delete_inode	= logfs_delete_inode,
+	.destroy_inode	= logfs_destroy_inode,
+	.read_inode	= logfs_read_inode,
+	.write_inode	= logfs_write_inode,
+	.statfs		= logfs_statfs,
+};
+
+
+int logfs_init_inode_cache(void)
+{
+	TRACE();
+	logfs_inode_cache = kmem_cache_create("logfs_inode_cache",
+			sizeof(struct logfs_inode), 0, SLAB_RECLAIM_ACCOUNT,
+			logfs_init_once, NULL);
+	if (!logfs_inode_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+
+void logfs_destroy_inode_cache(void)
+{
+	TRACE();
+	kmem_cache_destroy(logfs_inode_cache);
+}
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/journal.c	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,216 @@
+#include <linux/fs.h>
+#include <linux/mtd/mtd.h>
+
+#include "logfs.h"
+
+
+/**
+ * FIXME: this code assumes that journal entries all fit into a block.
+ * Might become untrue someday.
+ */
+
+
+static inline u32 seg_offset(struct logfs_super *super, u64 ofs)
+{
+	u64 div = ofs;
+	u32 mod;
+
+	mod = do_div(div, super->s_segsize);
+	return mod;
+}
+
+
+static s64 __logfs_get_free_entry(struct logfs_super *super)
+{
+	u64 ofs;
+
+	ofs = super->s_anchor_ofs + super->s_blocksize;
+	if (ofs - super->s_log_ofs >= super->s_log_len)
+		ofs = super->s_log_ofs;
+	if ( !seg_offset(super, ofs)) {
+		if (logfs_erase_segment(super, ofs))
+			return -EIO;
+	}
+	BUG_ON((s64)ofs < 0);
+	return ofs;
+}
+
+
+/**
+ * logfs_get_free_entry - return free space for journal entry
+ */
+static s64 logfs_get_free_entry(struct logfs_super *super)
+{
+	s64 ret;
+
+	TRACE();
+	mutex_lock(&super->s_log_sem);
+	ret = __logfs_get_free_entry(super);
+	mutex_unlock(&super->s_log_sem);
+	BUG_ON(ret <= 0); /* not sure, but it's safer to BUG than to accept */
+	return ret;
+}
+
+
+static s64 logfs_find_anchor(struct logfs_super *super,
+		struct logfs_journal_entry *je)
+{
+	struct mtd_info *mtd = super->s_mtd;
+	u64 ofs, version, last_ofs = -EIO, last_version = 0;
+	int ret;
+
+	for (ofs = super->s_log_ofs; ofs - super->s_log_ofs < super->s_log_len;
+			ofs += super->s_blocksize) {
+		ret = mtdread(mtd, ofs, sizeof(*je), je);
+		if (ret)
+			return -EIO;
+		version = be64_to_cpu(je->je_version);
+		if (version == -1ULL)
+			continue;
+		if (version == last_version)
+			return -EIO;
+		if (version < last_version)
+			continue;
+		last_version = version;
+		last_ofs = ofs;
+	}
+	return last_ofs;
+}
+
+
+static void logfs_load_sums(struct logfs_super *super)
+{
+	struct logfs_journal_entry *je = super->s_je;
+	int i;
+
+	for (i=0; i<LOGFS_SEGMENTS; i++) {
+		struct logfs_segment *seg = super->s_segs[i];
+		void *sum = (void*)je + super->s_sum_start;
+		sum += i * logfs_summary_size(super);
+		logfs_disk_to_sum(super, seg, sum);
+	}
+}
+
+
+static int logfs_read_anchor(struct inode *inode)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_journal_entry *je = super->s_je;
+	int i, ret;
+	s64 ofs;
+
+	TRACE();
+	ofs = logfs_find_anchor(super, je);
+	if (ofs < 0)
+		return ofs;
+
+	printk("%x\n", super->s_anchor_size);
+	ret = mtdread(mtd, ofs, super->s_anchor_size, je);
+	if (ret)
+		return ret;
+
+	logfs_load_sums(super);
+
+	BUG_ON(je->je_type != cpu_to_be16(JE_ANCHOR));
+	super->s_last_version = be64_to_cpu(je->je_version);
+	super->s_last_ino = be64_to_cpu(je->da.da_last_ino);
+	super->s_gec	= be64_to_cpu(je->da.da_gec);
+	super->s_maxec	= be64_to_cpu(je->da.da_maxec);
+	super->s_sweeper= be64_to_cpu(je->da.da_sweeper);
+	super->s_anchor_ofs = ofs;
+	li->li_flags	= LOGFS_IF_VALID;
+	i_size_write(inode, be64_to_cpu(je->da.da_size));
+
+	for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = be64_to_cpu(je->da.da_data[i]);
+
+	return 0;
+}
+
+
+static void logfs_save_sums(struct logfs_super *super)
+{
+	struct logfs_journal_entry *je = super->s_je;
+	int i;
+
+	for (i=0; i<LOGFS_SEGMENTS; i++) {
+		struct logfs_segment *seg = super->s_segs[i];
+		void *sum = (void*)je + super->s_sum_start;
+		sum += i * logfs_summary_size(super);
+		logfs_sum_to_disk(super, seg, sum);
+	}
+}
+
+
+int logfs_write_anchor(struct inode *inode)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_journal_entry *je = super->s_je;
+	u64 ofs;
+	int i, ret;
+
+	TRACE();
+	ofs = logfs_get_free_entry(super);
+	BUG_ON(ofs >= super->s_size);
+
+	memset(je, 0, super->s_anchor_size);
+	je->je_type = cpu_to_be16(JE_ANCHOR);
+	super->s_last_version++;
+	je->je_version	= cpu_to_be64(super->s_last_version);
+	je->da.da_gec	= cpu_to_be64(super->s_gec);
+	je->da.da_maxec	= cpu_to_be64(super->s_maxec);
+	je->da.da_sweeper = cpu_to_be64(super->s_sweeper);
+	je->da.da_last_ino	= cpu_to_be64(super->s_last_ino);
+	je->da.da_size	= cpu_to_be64(i_size_read(inode));
+	for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)
+		je->da.da_data[i] = cpu_to_be64(li->li_data[i]);
+
+	logfs_save_sums(super);
+
+	ret = mtdwrite(mtd, ofs, super->s_anchor_size, je);
+	if (ret)
+		return ret;
+	super->s_anchor_ofs = ofs;
+	return 0;
+}
+
+
+int logfs_init_log(struct super_block *sb)
+{
+	struct logfs_super *super = LOGFS_SUPER(sb);
+	struct inode *inode;
+	int ret;
+
+	TRACE();
+	mutex_init(&super->s_log_sem);
+
+	super->s_je = kzalloc(super->s_anchor_size, GFP_KERNEL);
+	if (!super->s_je)
+		return -ENOMEM;
+
+	inode = logfs_new_master_inode(sb);
+	BUG_ON(!inode);
+	ret = logfs_read_anchor(inode);
+	if (ret)
+		return ret;
+	inode->i_nlink = 1; /* lock it in ram (FIXME: remove) */
+
+	super->s_master_inode = inode;
+	return 0;
+}
+
+
+void logfs_cleanup_log(struct super_block *sb)
+{
+	struct logfs_super *super = LOGFS_SUPER(sb);
+
+	logfs_write_anchor(super->s_master_inode);
+	sb->s_op->destroy_inode(super->s_master_inode);
+	super->s_master_inode = NULL;
+
+	kfree(super->s_je);
+}
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/readwrite.c	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,992 @@
+#include <linux/fs.h>
+#include <linux/mtd/mtd.h>
+#include <linux/pagemap.h>
+
+#include "logfs.h"
+
+
+static int logfs_read_empty(void *buf)
+{
+	TRACE();
+	memset(buf, 0, PAGE_CACHE_SIZE);
+	return 0;
+}
+
+
+static int logfs_read_embedded(struct inode *inode, void *buf)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+
+	TRACE();
+	memcpy(buf, li->li_data, LOGFS_EMBEDDED_SIZE);
+	return 0;
+}
+
+
+static int logfs_read_direct(struct inode *inode, pgoff_t index, void *buf)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct mtd_info *mtd = LOGFS_SUPER(inode->i_sb)->s_mtd;
+	u64 block;
+
+	TRACE();
+	block = li->li_data[index];
+	if (!block)
+		return logfs_read_empty(buf);
+
+	return mtdread(mtd, block, LOGFS_BLOCKSIZE, buf);
+}
+
+
+static be64 *logfs_get_rblock(struct logfs_super *super)
+{
+	TRACE();
+	mutex_lock(&super->s_r_sem);
+	return super->s_rblock;
+}
+
+
+static void logfs_put_rblock(struct logfs_super *super)
+{
+	mutex_unlock(&super->s_r_sem);
+}
+
+
+static be64 **logfs_get_wblocks(struct logfs_super *super)
+{
+	TRACE();
+	mutex_lock(&super->s_w_sem);
+	logfs_gc_pass(super);
+	return super->s_wblock;
+}
+
+
+static void logfs_put_wblocks(struct logfs_super *super)
+{
+	TRACE();
+	mutex_unlock(&super->s_w_sem);
+}
+
+
+static unsigned long get_bits(u64 val, int skip, int no)
+{
+	u64 ret = val;
+	ret >>= skip;
+	ret <<= 64 - no;
+	ret >>= 64 - no;
+	BUG_ON((unsigned long)ret != ret);
+	return ret;
+}
+
+
+static int logfs_read_loop(struct inode *inode, pgoff_t index, void *buf,
+		int count)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	be64 *rblock;
+	u64 bofs = li->li_data[I1_INDEX + count];
+	int bits = LOGFS_BLOCK_BITS;
+	int i, ret;
+
+	TRACE();
+
+	if (!bofs)
+		return logfs_read_empty(buf);
+
+	rblock = logfs_get_rblock(super);
+
+	for (i=count*bits; i>=0; i-=bits) {
+		ret = mtdread(mtd, bofs, LOGFS_BLOCKSIZE, rblock);
+		if (ret)
+			goto out;
+		bofs = be64_to_cpu(rblock[get_bits(index, i, bits)]);
+
+		if (!bofs) {
+			ret = logfs_read_empty(buf);
+			goto out;
+		}
+	}
+
+	ret = mtdread(mtd, bofs, LOGFS_BLOCKSIZE, buf);
+out:
+	logfs_put_rblock(super);
+	return ret;
+}
+
+
+static int logfs_read_block(struct inode *inode, pgoff_t index, void *buf)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+
+	TRACE();
+	if (li->li_flags & LOGFS_IF_EMBEDDED) {
+		if (index != 0)
+			return logfs_read_empty(buf);
+		else
+			return logfs_read_embedded(inode, buf);
+	} else if (index < I0_BLOCKS)
+		return logfs_read_direct(inode, index, buf);
+	else if (index < I1_BLOCKS)
+		return logfs_read_loop(inode, index, buf, 0);
+	else if (index < I2_BLOCKS)
+		return logfs_read_loop(inode, index, buf, 1);
+	else if (index < I3_BLOCKS)
+		return logfs_read_loop(inode, index, buf, 2);
+
+	BUG();
+	return -EIO;
+}
+
+
+static int logfs_is_valid_direct(struct logfs_inode *li, pgoff_t index, u64 ofs)
+{
+	TRACE();
+	return li->li_data[index] == ofs;
+}
+
+
+static int logfs_is_valid_loop(struct inode *inode, pgoff_t index,
+		int count, u64 ofs)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	be64 *rblock;
+	u64 bofs = li->li_data[I1_INDEX + count];
+	int bits = LOGFS_BLOCK_BITS;
+	int i, ret;
+
+	TRACE();
+
+	if (!bofs)
+		return 0;
+
+	rblock = logfs_get_rblock(super);
+
+	for (i=count*bits; i>=0; i-=bits) {
+		ret = mtdread(mtd, bofs, LOGFS_BLOCKSIZE, rblock);
+		if (ret)
+			goto fail;
+
+		bofs = be64_to_cpu(rblock[get_bits(index, i, bits)]);
+		if (!bofs)
+			goto fail;
+
+		if (bofs == ofs) {
+			ret = 1;
+			goto out;
+		}
+	}
+
+fail:
+	ret = 0;
+out:
+	logfs_put_rblock(super);
+	return ret;
+}
+
+
+static int __logfs_is_valid_block(struct inode *inode, pgoff_t index, u64 ofs)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+
+	//printk("%lx, %x, %x\n", inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count));
+	if ((inode->i_nlink == 0) && atomic_read(&inode->i_count) == 1)
+		return 0;
+
+	if (li->li_flags & LOGFS_IF_EMBEDDED)
+		return 0;
+
+	if (index < I0_BLOCKS)
+		return logfs_is_valid_direct(li, index, ofs);
+	else if (index < I1_BLOCKS)
+		return logfs_is_valid_loop(inode, index, 0, ofs);
+	else if (index < I2_BLOCKS)
+		return logfs_is_valid_loop(inode, index, 1, ofs);
+	else if (index < I3_BLOCKS)
+		return logfs_is_valid_loop(inode, index, 2, ofs);
+
+	BUG();
+	return 0;
+}
+
+
+int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 pos)
+{
+	struct inode *write_inode = LOGFS_SUPER(sb)->s_write_inode;
+	struct inode *inode;
+	u64 idx;
+	int ret;
+
+	TRACE();
+	idx = logfs_index(pos);
+
+	BUG_ON((u64)(u_long)ino != ino);
+	inode = write_inode ? : logfs_iget(sb, ino);
+	if (!inode)
+		return 0;
+	ret = __logfs_is_valid_block(inode, idx, ofs);
+	if (!write_inode)
+		logfs_iput(inode);
+	return ret;
+}
+
+
+int logfs_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	void *buf;
+	int ret = -EIO;
+
+	TRACE();
+	buf = kmap(page);
+	ret = logfs_read_block(inode, page->index, buf);
+	kunmap(page);
+
+	if (ret) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		ClearPageError(page);
+	}
+	flush_dcache_page(page);
+
+	unlock_page(page);
+	return ret;
+}
+
+
+/**
+ * logfs_file_read - generic_file_read for in-kernel buffers
+ */
+static ssize_t __logfs_inode_read(struct inode *inode, char *buf, size_t count,
+		loff_t *ppos)
+{
+	void *block_data = NULL;
+	loff_t size = i_size_read(inode);
+	int err = -ENOMEM;
+
+	TRACE();
+	pr_debug("read from %lld, count %zd\n", *ppos, count);
+
+	if (*ppos >= size)
+		return 0;
+	if (count > size - *ppos)
+		count = size - *ppos;
+
+	BUG_ON(logfs_index(*ppos) != logfs_index(*ppos + count - 1));
+
+	block_data = kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
+	if (!block_data)
+		goto fail;
+
+	err = logfs_read_block(inode, logfs_index(*ppos), block_data);
+	if (err)
+		goto fail;
+
+	memcpy(buf, block_data + (*ppos % LOGFS_BLOCKSIZE), count);
+	*ppos += count;
+	kfree(block_data);
+	return count;
+fail:
+	kfree(block_data);
+	return err;
+}
+
+
+static s64 __logfs_write_block(struct logfs_super *super, void *buf, int level,
+		u64 inode, u64 pos)
+{
+	struct mtd_info *mtd = super->s_mtd;
+	u64 block;
+	int err;
+
+	TRACE();
+	block = logfs_get_free_block(super, level, inode, (__force pos_t) pos);
+	if (block < 0) {
+		BUG();
+		return block;
+	}
+
+	err = mtdwrite(mtd, block, PAGE_CACHE_SIZE, buf);
+	if (err) {
+		BUG();
+		return err;
+	}
+	return block;
+}
+
+
+static s64 logfs_write_block(struct logfs_super *super, void *buf, int level,
+		u64 inode, pgoff_t index)
+{
+	u64 pos = (u64)index * super->s_blocksize;
+	return __logfs_write_block(super, buf, level, inode, pos);
+}
+
+
+static void __logfs_set_blocks(struct inode *inode)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+
+	inode->i_blocks = ULONG_MAX;
+	if (li->li_blocks<<3 < ULONG_MAX)
+		inode->i_blocks = li->li_blocks<<3;
+}
+
+
+void logfs_set_blocks(struct inode *inode, u64 no)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+
+	li->li_blocks = no;
+	__logfs_set_blocks(inode);
+}
+
+
+static void logfs_add_blocks(struct inode *inode, u64 no)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+
+	BUG_ON(li->li_blocks + no < no); /* wraps are bad, mkay */
+	li->li_blocks += no;
+	__logfs_set_blocks(inode);
+}
+
+
+static void logfs_remove_blocks(struct inode *inode, u64 no, u64 ofs)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct logfs_inode *li = LOGFS_INODE(inode);
+
+	BUG_ON(li->li_blocks < no);
+	li->li_blocks -= no;
+	__logfs_set_blocks(inode);
+	super->s_free += no;
+}
+
+
+/* FIXME: s_free needs to be decremented as well */
+static int logfs_alloc_blocks(struct inode *inode, int no)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+
+	if (!no)
+		return 0;
+
+	//printk("%3llx %2x %2llx\n", super->s_free, no, super->s_gc_reserve);
+	if (super->s_free < no + super->s_gc_reserve)
+		return -ENOSPC;
+
+	super->s_free -= no;
+	logfs_add_blocks(inode, no);
+	return 0;
+}
+
+
+static int logfs_dirty_inode(struct inode *inode)
+{
+	TRACE();
+	if (inode->i_ino == LOGFS_INO_MASTER)
+		return logfs_write_anchor(inode);
+
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+
+/*
+ * File is too large for embedded data when called.  Move data to first
+ * block and clear embedded area
+ */
+static int logfs_move_embedded(struct inode *inode, be64 **wblocks)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	void *buf;
+	s64 block;
+	int i;
+
+	TRACE();
+	if (! (li->li_flags & LOGFS_IF_EMBEDDED))
+		return 0;
+
+	if (logfs_alloc_blocks(inode, 1))
+		return -ENOSPC;
+
+	buf = wblocks[0];
+
+	memcpy(buf, li->li_data, LOGFS_EMBEDDED_SIZE);
+	block = logfs_write_block(super, buf, 0, inode->i_ino, 0);
+	if (block < 0)
+		return block;
+
+	li->li_data[0] = block;
+
+	li->li_flags &= ~LOGFS_IF_EMBEDDED;
+	for (i=1; i<LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = 0;
+
+	return logfs_dirty_inode(inode);
+}
+
+
+static int logfs_write_embedded(struct inode *inode, void *buf)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	void *dst = li->li_data;
+
+	TRACE();
+	//printk("inode->i_size: %lld\n", i_size_read(inode));
+	memcpy(dst, buf, max((long long)LOGFS_EMBEDDED_SIZE, i_size_read(inode)));
+
+	li->li_flags |= LOGFS_IF_EMBEDDED;
+	logfs_set_blocks(inode, 0);
+
+	return logfs_dirty_inode(inode);
+}
+
+
+static int logfs_write_direct(struct inode *inode, pgoff_t index, void *buf)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	s64 block;
+
+	TRACE();
+	if (! li->li_data[index])
+		if (logfs_alloc_blocks(inode, 1))
+			return -ENOSPC;
+
+	block = logfs_write_block(super, buf, 0, inode->i_ino, index);
+	if (block < 0)
+		return block;
+
+	li->li_data[index] = block;
+
+	return logfs_dirty_inode(inode);
+}
+
+
+static int logfs_write_loop(struct inode *inode, pgoff_t index, void *buf,
+		be64 **wblocks, int count)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	u64 bofs = li->li_data[I1_INDEX + count];
+	s64 block;
+	int bits = LOGFS_BLOCK_BITS;
+	int allocs = 0;
+	int i, ret;
+
+	for (i=count; i>=0; i--) {
+		if (bofs) {
+			ret = mtdread(mtd, bofs, LOGFS_BLOCKSIZE, wblocks[i]);
+			if (ret)
+				return ret;
+		} else {
+			allocs++;
+			memset(wblocks[i], 0, LOGFS_BLOCKSIZE);
+		}
+		bofs = be64_to_cpu(wblocks[i][get_bits(index, i*bits, bits)]);
+	}
+
+	if (! wblocks[0][get_bits(index, 0, bits)])
+		allocs++;
+	if (logfs_alloc_blocks(inode, allocs))
+		return -ENOSPC;
+
+	block = logfs_write_block(super, buf, 0, inode->i_ino, index);
+	if (block < 0)
+		return block;
+
+	for (i=0; i<=count; i++) {
+		wblocks[i][get_bits(index, i*bits, bits)] = cpu_to_be64(block);
+		block = logfs_write_block(super, wblocks[i], 1, inode->i_ino,
+				index);
+		if (block < 0)
+			return block;
+	}
+
+	li->li_data[I1_INDEX + count] = block;
+
+	return logfs_dirty_inode(inode);
+}
+
+
+static int __logfs_write_buf(struct inode *inode, pgoff_t index, void *buf,
+		be64 **wblocks)
+{
+	u64 size = i_size_read(inode);
+	int err;
+
+	TRACE();
+
+	inode->i_ctime.tv_sec = inode->i_mtime.tv_sec = get_seconds();
+
+	if (size <= LOGFS_EMBEDDED_SIZE)
+		return logfs_write_embedded(inode, buf);
+
+	err = logfs_move_embedded(inode, wblocks);
+	if (err)
+		return err;
+
+	if (index < I0_BLOCKS)
+		return logfs_write_direct(inode, index, buf);
+	if (index < I1_BLOCKS)
+		return logfs_write_loop(inode, index, buf, wblocks, 0);
+	if (index < I2_BLOCKS)
+		return logfs_write_loop(inode, index, buf, wblocks, 1);
+	if (index < I3_BLOCKS)
+		return logfs_write_loop(inode, index, buf, wblocks, 2);
+
+	BUG();
+	return -EIO;
+}
+
+
+int logfs_write_buf(struct inode *inode, pgoff_t index, void *buf)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	be64 **wblocks;
+	int ret;
+
+	wblocks = logfs_get_wblocks(super);
+	ret = __logfs_write_buf(inode, index, buf, wblocks);
+	logfs_put_wblocks(super);
+	return ret;
+}
+
+
+static int logfs_rewrite_direct(struct inode *inode, pgoff_t index, void *buf)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	s64 block;
+	int err;
+
+	TRACE();
+	block = li->li_data[index];
+	BUG_ON(! block);
+
+	err = mtdread(mtd, block, LOGFS_BLOCKSIZE, buf);
+	if (err)
+		return err;
+
+	block = logfs_write_block(super, buf, 0, inode->i_ino, index);
+	if (block < 0)
+		return block;
+
+	li->li_data[index] = block;
+
+	return logfs_dirty_inode(inode);
+}
+
+
+static int logfs_rewrite_loop(struct inode *inode, pgoff_t index, void *buf,
+		be64 **wblocks, int count)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	u64 bofs = li->li_data[I1_INDEX + count];
+	s64 block;
+	int bits = LOGFS_BLOCK_BITS;
+	int i, err;
+
+	for (i=count; i>=0; i--) {
+		if (bofs) {
+			err = mtdread(mtd, bofs, LOGFS_BLOCKSIZE, wblocks[i]);
+			if (err)
+				return err;
+		} else {
+			BUG();
+		}
+		bofs = be64_to_cpu(wblocks[i][get_bits(index, i*bits, bits)]);
+	}
+
+	block = be64_to_cpu(wblocks[0][get_bits(index, 0, bits)]);
+	BUG_ON(! block);
+
+	err = mtdread(mtd, block, LOGFS_BLOCKSIZE, buf);
+	if (err)
+		return err;
+
+	block = logfs_write_block(super, buf, 0, inode->i_ino, index);
+	if (block < 0)
+		return block;
+
+	for (i=0; i<=count; i++) {
+		wblocks[i][get_bits(index, i*bits, bits)] = cpu_to_be64(block);
+		block = logfs_write_block(super, wblocks[i], 1, inode->i_ino,
+				index);
+		if (block < 0)
+			return block;
+	}
+
+	li->li_data[I1_INDEX + count] = block;
+
+	return logfs_dirty_inode(inode);
+}
+
+
+static int __logfs_rewrite_block(struct inode *inode, pgoff_t index, void *buf,
+		be64 **wblocks)
+{
+	TRACE();
+
+	if (index < I0_BLOCKS)
+		return logfs_rewrite_direct(inode, index, buf);
+	if (index < I1_BLOCKS)
+		return logfs_rewrite_loop(inode, index, buf, wblocks, 0);
+	if (index < I2_BLOCKS)
+		return logfs_rewrite_loop(inode, index, buf, wblocks, 1);
+	if (index < I3_BLOCKS)
+		return logfs_rewrite_loop(inode, index, buf, wblocks, 2);
+
+	BUG();
+	return -EIO;
+}
+
+
+int logfs_rewrite_block(struct inode *inode, pgoff_t index, u64 ofs, void *buf)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	be64 **wblocks;
+	int ret;
+
+	wblocks = super->s_wblock;
+	buf = wblocks[LOGFS_MAX_INDIRECT];
+	ret = __logfs_rewrite_block(inode, index, buf, wblocks);
+	return ret;
+}
+
+
+/**
+ * Three cases exist:
+ * size <= pos			- remove full block
+ * size >= pos + chunk		- do nothing
+ * pos < size < pos + chunk	- truncate, rewrite
+ */
+static s64 __logfs_truncate_i0(struct inode *inode, u64 size, u64 bofs,
+		u64 pos, be64 **wblocks)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	size_t len = size - pos;
+	void *buf = wblocks[LOGFS_MAX_INDIRECT];
+	int err;
+
+	if (size <= pos) {	/* remove whole block */
+		logfs_remove_blocks(inode, 1, bofs);
+		return 0;
+	}
+
+	/* truncate this block, rewrite it */
+	memset(buf, 0, LOGFS_BLOCKSIZE);
+	err = mtdread(mtd, bofs, len, buf);
+	if (err)
+		return err;
+
+	return __logfs_write_block(super, buf, 0, inode->i_ino, pos);
+}
+
+
+/* FIXME: move to super */
+static u64 logfs_factor[] = {
+	LOGFS_BLOCKSIZE,
+	LOGFS_I1_SIZE,
+	LOGFS_I2_SIZE,
+	LOGFS_I3_SIZE
+};
+
+
+static u64 logfs_start[] = {
+	LOGFS_I0_SIZE,
+	LOGFS_I1_SIZE,
+	LOGFS_I2_SIZE,
+	LOGFS_I3_SIZE
+};
+
+
+/*
+ * One recursion per indirect block.  Logfs supports 5fold indirect blocks.
+ */
+static s64 __logfs_truncate_loop(struct inode *inode, u64 size, u64 old_bofs,
+		u64 pos, be64 **wblocks, int i)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	struct mtd_info *mtd = super->s_mtd;
+	s64 ofs;
+	int e, ret;
+
+	ret = mtdread(mtd, old_bofs, LOGFS_BLOCKSIZE, wblocks[i]);
+	if (ret)
+		return ret;
+
+	for (e = LOGFS_BLOCK_FACTOR-1; e>=0; e--) {
+		u64 bofs;
+		u64 new_pos = pos + e*logfs_factor[i];
+
+		if (size >= new_pos + logfs_factor[i])
+			break;
+
+		bofs = be64_to_cpu(wblocks[i][e]);
+		if (!bofs)
+			continue;
+
+		BUG_ON(bofs > super->s_size);
+
+		if (i)
+			ofs = __logfs_truncate_loop(inode, size, bofs, new_pos,
+					wblocks, i-1);
+		else
+			ofs = __logfs_truncate_i0(inode, size, bofs, new_pos,
+					wblocks);
+		if (ofs < 0)
+			return ofs;
+
+		wblocks[i][e] = cpu_to_be64(ofs);
+	}
+
+	if (size <= max(pos, logfs_start[i])) {
+		/* complete indirect block is removed */
+		logfs_remove_blocks(inode, 1, old_bofs);
+		return 0;
+	}
+
+	/* partially removed - write back */
+	return __logfs_write_block(super, wblocks[i], 1, inode->i_ino, pos);
+}
+
+
+static int logfs_truncate_direct(struct inode *inode, u64 size, be64 **wblocks)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	int e;
+	s64 bofs, ofs;
+
+	for (e = I1_INDEX-1; e>=0; e--) {
+		u64 new_pos = e*logfs_factor[0];
+
+		if (size > e*logfs_factor[0])
+			break;
+
+		bofs = li->li_data[e];
+		if (!bofs)
+			continue;
+
+		ofs = __logfs_truncate_i0(inode, size, bofs, new_pos, wblocks);
+		if (ofs < 0)
+			return ofs;
+
+		li->li_data[e] = ofs;
+	}
+	return 0;
+}
+
+
+static int logfs_truncate_loop(struct inode *inode, u64 size, be64 **wblocks,
+		int i)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	u64 bofs = li->li_data[I1_INDEX + i];
+	s64 ofs;
+
+	if (!bofs)
+		return 0;
+
+	ofs = __logfs_truncate_loop(inode, size, bofs, 0, wblocks, i);
+	if (ofs < 0)
+		return ofs;
+
+	li->li_data[I1_INDEX + i] = ofs;
+	return 0;
+}
+
+
+static void logfs_truncate_embedded(struct inode *inode, u64 size)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	void *buf = (void*)li->li_data + size;
+	size_t len = LOGFS_EMBEDDED_SIZE - size;
+
+	TRACE();
+	if (size >= LOGFS_EMBEDDED_SIZE)
+		return;
+	memset(buf, 0, len);
+}
+
+
+/* TODO: might make sense to turn inode into embedded again */
+static void __logfs_truncate(struct inode *inode, be64 **wblocks)
+{
+	struct logfs_inode *li = LOGFS_INODE(inode);
+	u64 size = i_size_read(inode);
+	int ret;
+
+	if (li->li_flags & LOGFS_IF_EMBEDDED)
+		return logfs_truncate_embedded(inode, size);
+
+	if (size >= logfs_factor[3])
+		return;
+	ret = logfs_truncate_loop(inode, size, wblocks, 2);
+	BUG_ON(ret);
+
+	if (size >= logfs_factor[2])
+		return;
+	ret = logfs_truncate_loop(inode, size, wblocks, 1);
+	BUG_ON(ret);
+
+	if (size >= logfs_factor[1])
+		return;
+	ret = logfs_truncate_loop(inode, size, wblocks, 0);
+	BUG_ON(ret);
+
+	ret = logfs_truncate_direct(inode, size, wblocks);
+	BUG_ON(ret);
+}
+
+
+void logfs_truncate(struct inode *inode)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	be64 **wblocks;
+
+	wblocks = logfs_get_wblocks(super);
+	__logfs_truncate(inode, wblocks);
+	logfs_put_wblocks(super);
+	mark_inode_dirty(inode);
+}
+
+
+static ssize_t __logfs_inode_write(struct inode *inode, const char *buf,
+		size_t count, loff_t *ppos)
+{
+	void *block_data = NULL;
+	int err = -ENOMEM;
+
+	TRACE();
+	pr_debug("write to 0x%llx, count %zd\n", *ppos, count);
+
+	BUG_ON(logfs_index(*ppos) != logfs_index(*ppos + count - 1));
+
+	block_data = kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
+	if (!block_data)
+		goto fail;
+
+	err = logfs_read_block(inode, logfs_index(*ppos), block_data);
+	if (err)
+		goto fail;
+
+	memcpy(block_data + (*ppos % LOGFS_BLOCKSIZE), buf, count);
+
+	if (i_size_read(inode) < *ppos + count)
+		i_size_write(inode, *ppos + count);
+
+	err = logfs_write_buf(inode, logfs_index(*ppos), block_data);
+	if (err)
+		goto fail;
+
+	*ppos += count;
+	pr_debug("write to %lld, count %zd\n", *ppos, count);
+	kfree(block_data);
+	return count;
+fail:
+	kfree(block_data);
+	return err;
+}
+
+
+int logfs_inode_read(struct inode *inode, void *buf, size_t n, loff_t _pos)
+{
+	loff_t pos = _pos;
+	ssize_t ret;
+
+	TRACE();
+	if (pos >= i_size_read(inode))
+		return -EOF;
+	ret = __logfs_inode_read(inode, buf, n, &pos);
+	ret = ret==n ? 0 : -EIO;
+	return ret;
+}
+
+
+int logfs_inode_write_nolock(struct inode *inode, const void *buf, size_t n,
+		loff_t _pos)
+{
+	loff_t pos = _pos;
+	ssize_t ret;
+
+	TRACE();
+	ret = __logfs_inode_write(inode, buf, n, &pos);
+	ret = ret==n ? 0 : -EIO;
+	return ret;
+}
+
+
+int logfs_inode_write(struct inode *inode, const void *buf, size_t n,
+		loff_t pos)
+{
+	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
+	int ret;
+
+	mutex_lock(&super->s_write_inode_mutex);
+	ret = logfs_inode_write_nolock(inode, buf, n, pos);
+	mutex_unlock(&super->s_write_inode_mutex);
+	return ret;
+}
+
+
+int logfs_inode_write_loop(struct inode *inode, const void *buf, size_t n,
+		loff_t _pos)
+{
+	loff_t pos = _pos;
+	int ret;
+
+	while (n > LOGFS_BLOCKSIZE) {
+		ret = logfs_inode_write(inode, buf, LOGFS_BLOCKSIZE, pos);
+		if (ret)
+			return ret;
+		pos += LOGFS_BLOCKSIZE;
+		buf += LOGFS_BLOCKSIZE;
+		n -= LOGFS_BLOCKSIZE;
+	}
+	return logfs_inode_write(inode, buf, n, pos);
+}
+
+
+int logfs_init_rw(struct logfs_super *super)
+{
+	int i;
+
+	mutex_init(&super->s_r_sem);
+	mutex_init(&super->s_w_sem);
+	super->s_rblock = kmalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
+	if (!super->s_wblock)
+		return -ENOMEM;
+	for (i=0; i<=LOGFS_MAX_INDIRECT; i++) {
+		super->s_wblock[i] = kmalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
+		if (!super->s_wblock) {
+			logfs_cleanup_rw(super);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+
+void logfs_cleanup_rw(struct logfs_super *super)
+{
+	int i;
+
+	for (i=0; i<=LOGFS_MAX_INDIRECT; i++)
+		kfree(super->s_wblock[i]);
+	kfree(super->s_rblock);
+}
--- /dev/null	2006-08-15 20:12:53.000000000 +0200
+++ logfs3/fs/logfs/super.c	2006-08-24 15:39:42.000000000 +0200
@@ -0,0 +1,435 @@
+#include <linux/fs.h>
+#include <linux/mtd/mtd.h>
+#include <linux/pagemap.h>
+#include <linux/statfs.h>
+
+#include "logfs.h"
+
+
+#define FAIL_ON(cond) do { if (unlikely((cond))) return -EINVAL; } while(0)
+
+int mtdread(struct mtd_info *mtd, loff_t ofs, size_t len, void *buf)
+{
+	size_t retlen;
+	int ret;
+
+	TRACE();
+	ret = mtd->read(mtd, ofs, len, &retlen, buf);
+	if (ret || (retlen != len)) {
+		printk("ret: %d\n", ret);
+		printk("retlen: %d, len: %d\n", retlen, len);
+		printk("ofs: %lld, mtd->size: %d\n", ofs, mtd->size);
+		dump_stack();
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+int mtdwrite(struct mtd_info *mtd, loff_t ofs, size_t len, void *buf)
+{
+	size_t retlen;
+	int ret;
+
+	TRACE();
+	//printk("write ofs=%llx, len=%x\n", ofs, len);
+	BUG_ON((ofs >= mtd->size) || (len > mtd->size - ofs));
+	ret = mtd->write(mtd, ofs, len, &retlen, buf);
+	if (ret || (retlen != len))
+		return -EIO;
+
+	return 0;
+}
+
+
+static DEFINE_MUTEX(logfs_erase_sem);
+static void logfs_erase_callback(struct erase_info *ei)
+{
+	mutex_unlock(&logfs_erase_sem);
+}
+int mtderase(struct mtd_info *mtd, loff_t ofs, size_t len)
+{
+	struct erase_info ei;
+	int ret;
+
+	TRACE();
+	BUG_ON(len % mtd->erasesize);
+
+	memset(&ei, 0, sizeof(ei));
+	ei.mtd = mtd;
+	ei.addr = ofs;
+	ei.len = len;
+	ei.callback = logfs_erase_callback;
+	ret = mtd->erase(mtd, &ei);
+	if (ret)
+		return -EIO;
+	mutex_lock(&logfs_erase_sem);
+	return 0;
+}
+
+
+int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
+{
+	struct logfs_super *super = LOGFS_SUPER(dentry->d_sb);
+
+	stats->f_type	= LOGFS_MAGIC_U32;
+	stats->f_bsize	= LOGFS_BLOCKSIZE;
+	stats->f_blocks	= super->s_size >> LOGFS_BLOCK_BITS >> 3;
+	stats->f_bfree	= super->s_free;
+	stats->f_bavail	= super->s_free; /* FIXME: leave some for root */
+	stats->f_files	= 0;
+	stats->f_ffree	= 0;
+	stats->f_namelen= LOGFS_MAX_NAMELEN;
+	return 0;
+}
+
+
+static int logfs_sb_set(struct super_block *sb, void *_super)
+{
+	struct logfs_super *super = _super;
+
+	TRACE();
+	sb->s_fs_info = super;
+	sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, super->s_mtd->index);
+
+	return 0;
+}
+
+
+/*
+ * FIXME: the error recovery logic appears to be wrong.  redo.
+ */
+static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
+{
+	struct inode *inode;
+
+	TRACE();
+
+	/* root dir */
+	inode = iget(sb, LOGFS_INO_ROOT);
+	if (!inode)
+		goto fail;
+
+	sb->s_root = d_alloc_root(inode);
+	if (!sb->s_root)
+		goto fail1;
+
+	return simple_set_mnt(mnt, sb);
+
+fail1:
+	iput(inode);
+fail:
+	iput(LOGFS_SUPER(sb)->s_master_inode);
+	return -EIO;
+}
+
+
+static int logfs_mkfs_rootdir(struct logfs_super *super,
+		struct logfs_disk_super *ds, u32 seg_size)
+{
+	struct logfs_disk_inode *di;
+	u64 root_ofs;
+	int ret;
+
+	di = kzalloc(sizeof(*di), GFP_KERNEL);
+	if (!di)
+		return -ENOMEM;
+
+	di->di_flags	= cpu_to_be32(LOGFS_IF_VALID);
+	di->di_mode	= cpu_to_be16(S_IFDIR | 0755);
+	di->di_refcount	= cpu_to_be32(2);
+	root_ofs = 3*seg_size + LOGFS_INO_ROOT*sizeof(*di);
+	ret = mtdwrite(super->s_mtd, root_ofs, sizeof(*di), di);
+	kfree(di);
+	return ret;
+}
+
+
+static int logfs_mkfs_summary(struct logfs_super *super,
+		struct logfs_disk_super *ds, u32 seg_size)
+{
+	struct logfs_disk_sum *sum;
+	u64 sum_ofs;
+	int ret;
+
+	sum = kmalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
+	if (!sum)
+		return -ENOMEM;
+	memset(sum, 0xff, LOGFS_BLOCKSIZE);
+
+	sum->blocks[0].ino = cpu_to_be64(LOGFS_INO_MASTER);
+	sum->blocks[0].pos = cpu_to_be64(0);
+	sum_ofs = 4*seg_size - LOGFS_BLOCKSIZE;
+	sum->level = LOGFS_MAX_LEVELS;
+	ret = mtdwrite(super->s_mtd, sum_ofs, LOGFS_BLOCKSIZE, sum);
+	kfree(sum);
+	return ret;
+}
+
+
+static int logfs_mkfs_anchor(struct logfs_super *super,
+		struct logfs_disk_super *ds, u32 seg_size)
+{
+	struct logfs_journal_entry *je;
+	int ret;
+
+	je = kzalloc(sizeof(*je), GFP_KERNEL);
+	if (!je)
+		return -ENOMEM;
+
+	je->je_type = cpu_to_be16(JE_ANCHOR);
+	je->je_version = cpu_to_be64(1);
+	je->da.da_gec = 0;
+	je->da.da_maxec = 0;
+	je->da.da_sweeper = cpu_to_be64(4);
+	je->da.da_last_ino = cpu_to_be64(LOGFS_RESERVED_INOS);
+	je->da.da_size = cpu_to_be64((LOGFS_INO_ROOT+1)
+			* sizeof(struct logfs_disk_inode));
+	je->da.da_data[0] = cpu_to_be64(3*seg_size);
+	ret = mtdwrite(super->s_mtd, seg_size, sizeof(*je), je);
+	kfree(je);
+	return ret;
+}
+
+
+static int logfs_mkfs_super(struct logfs_super *super,
+		struct logfs_disk_super *ds, u32 seg_size, u32 block_size)
+{
+	u64 no_segments;
+	u64 size;
+	u32 mod;
+	int sum_start;
+	int no_blocks;
+
+	ds->ds_magic = cpu_to_be64(LOGFS_MAGIC);
+	ds->ds_segment_size = cpu_to_be32(seg_size);
+	ds->ds_block_size = cpu_to_be32(block_size);
+
+	ds->ds_journal_ofs = cpu_to_be64(seg_size);
+	ds->ds_journal_len = cpu_to_be64(2*seg_size);
+
+	ds->ds_root_reserve = 0;
+
+	size = super->s_mtd->size; /* size must be a multiple of seg_size */
+	mod = do_div(size, seg_size);
+	no_segments = size;
+	size = super->s_mtd->size - mod;
+	ds->ds_filesystem_size = cpu_to_be64(size);
+
+#if 0	/* sane defaults */
+	ds->ds_ifile_levels	= 3; /* 2+1, 1GiB */
+	ds->ds_iblock_levels	= 4; /* 3+1, 512GiB */
+	ds->ds_data_levels	= 3; /* old, young, unknown */
+#else
+	ds->ds_ifile_levels	= 1; /* 0+1, 80kiB */
+	ds->ds_iblock_levels	= 4; /* 3+1, 512GiB */
+	ds->ds_data_levels	= 1; /* unknown */
+#endif
+
+	ds->ds_anchor_size = cpu_to_be32(block_size);
+	no_blocks = seg_size / block_size;
+	sum_start  = block_size;
+	sum_start -= LOGFS_SEGMENTS * no_blocks * sizeof(struct logfs_block);
+	BUG_ON(sum_start < 2048);
+	ds->ds_sum_start = cpu_to_be32(sum_start);
+	return mtdwrite(super->s_mtd, 0, sizeof(*ds), ds);
+}
+
+
+static int logfs_mkfs(struct logfs_super *super, struct logfs_disk_super *ds)
+{
+	u32 seg_size = 1<<15;
+	u32 block_size = 1<<12;
+	int ret = 0;
+
+	TRACE();
+
+	ret = logfs_mkfs_rootdir(super, ds, seg_size);
+	if (ret)
+		return ret;
+
+	ret = logfs_mkfs_summary(super, ds, seg_size);
+	if (ret)
+		return ret;
+
+	ret = logfs_mkfs_anchor(super, ds, seg_size);
+	if (ret)
+		return ret;
+
+	ret = logfs_mkfs_super(super, ds, seg_size, block_size);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+
+static int logfs_read_sb(struct super_block *sb)
+{
+	struct logfs_super *super = LOGFS_SUPER(sb);
+	struct logfs_disk_super ds;
+	int ret;
+
+	TRACE();
+	ret = mtdread(super->s_mtd, 0, sizeof(ds), &ds);
+	if (ret)
+		return ret;
+
+	if (be64_to_cpu(ds.ds_magic) != LOGFS_MAGIC) {
+		ret = logfs_mkfs(super, &ds);
+		if (ret)
+			return ret;
+	}
+	super->s_segsize = be32_to_cpu(ds.ds_segment_size);
+	super->s_blocksize = be32_to_cpu(ds.ds_block_size);
+	super->s_no_blocks = super->s_segsize / super->s_blocksize;
+
+	super->s_log_ofs = be64_to_cpu(ds.ds_journal_ofs);
+	super->s_log_len = be64_to_cpu(ds.ds_journal_len);
+	super->s_size = be64_to_cpu(ds.ds_filesystem_size);
+	super->s_anchor_size = be32_to_cpu(ds.ds_anchor_size);
+	super->s_sum_start = be32_to_cpu(ds.ds_sum_start);
+	super->s_root_reserve = be64_to_cpu(ds.ds_root_reserve);
+
+
+	super->s_ifile_levels = ds.ds_ifile_levels;
+	super->s_iblock_levels = ds.ds_iblock_levels;
+	super->s_data_levels = ds.ds_data_levels;
+	super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
+		+ super->s_data_levels;
+	super->s_gc_reserve = super->s_total_levels * (2*super->s_no_blocks -1);
+
+	/* FIXME: store data in flash superblock */
+	mutex_init(&super->s_write_inode_mutex);
+
+	ret = logfs_init_rw(super);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_segments(super);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_log(sb);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_gc(super);
+	if (ret)
+		return ret;
+
+	spin_lock_init(&super->s_ino_lock);
+	return 0;
+}
+
+
+static void logfs_kill_sb(struct super_block *sb)
+{
+	struct logfs_super *super = LOGFS_SUPER(sb);
+
+	TRACE();
+	generic_shutdown_super(sb);
+	logfs_cleanup_gc(super);
+	logfs_cleanup_log(sb);
+	logfs_cleanup_segments(super);
+	logfs_cleanup_rw(super);
+	put_mtd_device(super->s_mtd);
+	kfree(super);
+}
+
+
+static int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+		struct mtd_info *mtd, struct vfsmount *mnt)
+{
+	struct logfs_super *super = NULL;
+	struct super_block *sb;
+	int err = -ENOMEM;
+
+	TRACE();
+	super = kzalloc(sizeof*super, GFP_KERNEL);
+	if (!super)
+		goto err0;
+
+	super->s_mtd = mtd;
+	err = -EINVAL;
+	sb = sget(type, NULL, logfs_sb_set, super);
+	if (IS_ERR(sb))
+		goto err0;
+
+	sb->s_maxbytes	= LOGFS_I3_SIZE;
+	sb->s_op	= &logfs_super_operations;
+	sb->s_flags	= flags | MS_NOATIME;
+
+	super->s_sb = sb;
+	err = logfs_read_sb(sb);
+	if (err)
+		goto err0;
+
+	sb->s_flags |= MS_ACTIVE;
+	return logfs_get_sb_final(sb, mnt);
+
+err0:
+	kfree(super);
+	put_mtd_device(mtd);
+	return err;
+}
+
+
+static int logfs_get_sb(struct file_system_type *type, int flags,
+		const char *devname, void *data, struct vfsmount *mnt)
+{
+	ulong mtdnr;
+	struct mtd_info *mtd;
+
+	TRACE();
+#if 0
+	if (!devname)
+		return ERR_PTR(-EINVAL);
+	if (strncmp(devname, "mtd", 3))
+		return ERR_PTR(-EINVAL);
+
+	{
+		char *garbage;
+		mtdnr = simple_strtoul(devname+3, &garbage, 0);
+		if (*garbage)
+			return ERR_PTR(-EINVAL);
+	}
+#else
+	mtdnr = 0;
+#endif
+
+	mtd = get_mtd_device(NULL, mtdnr);
+	if (!mtd)
+		return -EINVAL;
+
+	return logfs_get_sb_mtd(type, flags, mtd, mnt);
+}
+
+
+static struct file_system_type logfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "logfs",
+	.get_sb		= logfs_get_sb,
+	.kill_sb	= logfs_kill_sb,
+};
+
+
+static int __init logfs_init(void)
+{
+	int ret = logfs_init_inode_cache();
+	if (ret)
+		return ret;
+	return register_filesystem(&logfs_fs_type);
+}
+
+
+static void __exit logfs_exit(void)
+{
+	unregister_filesystem(&logfs_fs_type);
+	logfs_destroy_inode_cache();
+}
+
+
+module_init(logfs_init);
+module_exit(logfs_exit);




More information about the linux-mtd mailing list