[PATCH 19/20] Add file data and debugfs read/write commands

Valerie Aurora val at versity.com
Thu Jun 12 13:11:11 PDT 2025


Implement ngnfs_data_read() and ngnfs_data_write() and corresponding
debugfs commands. Also copy over const_ilog2() and related functions
from the kernel.

Co-authored-by: Zach Brown <zab at zabbo.net>
Signed-off-by: Valerie Aurora <val at versity.com>
---
 cli/debugfs.c         | 174 +++++++++++++++++
 shared/data.c         | 445 ++++++++++++++++++++++++++++++++++++++++++
 shared/data.h         |  18 ++
 shared/format-block.h |  35 ++++
 shared/inode.c        |   1 +
 shared/lk/fls.h       |  68 +++++++
 shared/lk/log2.h      | 258 ++++++++++++++++++++++++
 7 files changed, 999 insertions(+)
 create mode 100644 shared/data.c
 create mode 100644 shared/data.h
 create mode 100644 shared/lk/fls.h
 create mode 100644 shared/lk/log2.h

diff --git a/cli/debugfs.c b/cli/debugfs.c
index 82b2724..6fb21a2 100644
--- a/cli/debugfs.c
+++ b/cli/debugfs.c
@@ -17,6 +17,7 @@
 #include "shared/lk/types.h"
 #include "shared/lk/xattr.h"
 
+#include "shared/data.h"
 #include "shared/dir.h"
 #include "shared/format-block.h"
 #include "shared/inode.h"
@@ -357,6 +358,105 @@ static void cmd_quit(struct debugfs_context *ctx, int argc, char **argv)
 	return;
 }
 
+static void cmd_read(struct debugfs_context *ctx, int argc, char **argv)
+{
+	struct ngnfs_dir_lookup_entry lent;
+	char *filename;
+	char *buf;
+	u64 offset, read_size;
+	u64 todo, done, zeroes, poisoned, garbage;
+	u64 i;
+	int buf_size = 4 * NGNFS_DATA_MAX_IO; /* allow multiple txn's per call */
+	int ret;
+
+	if (argc != 4) {
+		printf("usage: read <filename> <offset> <length>\n");
+		return;
+	}
+
+	filename = argv[1];
+
+	ret = strtoull_nerr(&offset, argv[2], NULL, 0);
+	if (ret < 0) {
+		print_err("parsing offset", ret);
+		return;
+	}
+
+	ret = parse_ull(&read_size, argv[3], 0, SIZE_MAX);
+	if (ret < 0)
+		return;
+
+	if (read_size < buf_size)
+		buf_size = read_size;
+
+	buf = malloc(buf_size);
+	if (!buf) {
+		printf("malloc error");
+		return;
+	}
+
+	ret = ngnfs_dir_lookup(ctx->nfi, &ctx->cwd_ig, filename, strlen(filename), &lent);
+	if (ret < 0) {
+		print_err("read", ret);
+		goto out;
+	}
+
+	zeroes = 0;
+	poisoned = 0;
+	garbage = 0;
+
+	for (done = 0; done < read_size; done += ret) {
+		todo = read_size - done;
+		if (todo > buf_size)
+			todo = buf_size;
+
+		memset(buf, 'x', todo); /* poison the buffer */
+
+		ret = ngnfs_data_read(ctx->nfi, &lent.ig, offset, buf, todo);
+		if (ret < 0) {
+			print_err("read", ret);
+			goto out;
+		} else if (ret == 0) {
+			printf("read: EOF\n");
+			break;
+		}
+
+		/* count what kind of bytes we read */
+		for (i = 0; i < ret; i++) {
+			switch (buf[i]) {
+			case '.':
+				break;
+			case '\0':
+				zeroes++;
+				break;
+			case 'x':
+				if (poisoned == 0)
+					printf("error: poisoned byte found at %llu\n", i + done);
+				poisoned++;
+				break;
+			default:
+				if (garbage == 0)
+					printf("error: garbage byte %c found at %llu\n",
+					       buf[i], i + done);
+				garbage++;
+				break;
+			}
+		}
+		offset += ret;
+	}
+
+	if (done < read_size)
+		printf("short read: %llu bytes of %llu requested\n", done, read_size);
+
+	if (done > read_size)
+		printf("error: read too many bytes: %llu of %llu requested\n", done, read_size);
+
+	printf("read %llu bytes (%llu zero bytes %llu poisoned bytes %llu garbage bytes)\n",
+	       done, zeroes, poisoned, garbage);
+out:
+	free(buf);
+}
+
 static void cmd_readdir(struct debugfs_context *ctx, int argc, char **argv)
 {
 	struct ngnfs_readdir_entry *buf;
@@ -604,6 +704,78 @@ static void cmd_unlink(struct debugfs_context *ctx, int argc, char **argv)
 		print_err("unlink", ret);
 }
 
+static void cmd_write(struct debugfs_context *ctx, int argc, char **argv)
+{
+	struct ngnfs_dir_lookup_entry lent;
+	char *filename;
+	char *buf;
+	u64 offset;
+	u64 write_size;
+	u64 buf_size = 4 * NGNFS_DATA_MAX_IO; /* allow multiple txn's per call */
+	u64 todo, done;
+	int ret;
+
+	if (argc != 4) {
+		printf("usage: write <filename> <offset> <length>\n");
+		return;
+	}
+
+	filename = argv[1];
+
+	ret = strtoull_nerr(&offset, argv[2], NULL, 0);
+	if (ret < 0) {
+		print_err("parsing offset", ret);
+		return;
+	}
+
+	ret = parse_ull(&write_size, argv[3], 0, SIZE_MAX);
+	if (ret < 0)
+		return;
+
+	if (write_size < buf_size)
+		buf_size = write_size;
+
+	buf = malloc(buf_size);
+	if (!buf) {
+		printf("malloc error");
+		return;
+	}
+	memset(buf, '.', buf_size);
+
+	ret = ngnfs_dir_lookup(ctx->nfi, &ctx->cwd_ig, filename, strlen(filename), &lent);
+	if (ret < 0) {
+		print_err("write", ret);
+		goto out;
+	}
+
+	for (done = 0; done < write_size; done += ret) {
+		todo = write_size - done;
+		if (todo > buf_size)
+			todo = buf_size;
+
+		ret = ngnfs_data_write(ctx->nfi, &lent.ig, offset, buf, todo);
+
+		if (ret < 0) {
+			print_err("write", ret);
+			goto out;
+		}
+
+		if (ret == 0)
+			break;
+
+		offset += ret;
+	}
+
+	if (done < write_size)
+		printf("short write: %llu of %llu bytes requested\n", done, buf_size);
+	else if (done > write_size)
+		printf("error: wrote too many bytes: %llu of %llu requested\n", done, buf_size);
+	else
+		printf("wrote %llu bytes\n", done);
+out:
+	free(buf);
+}
+
 static struct command {
 	char *name;
 	void (*func)(struct debugfs_context *ctx, int argc, char **argv);
@@ -618,6 +790,7 @@ static struct command {
 	{ "mkdir", cmd_mkdir, },
 	{ "mkfs", cmd_mkfs, },
 	{ "quit", cmd_quit, },
+	{ "read", cmd_read, },
 	{ "readdir", cmd_readdir, },
 	{ "removexattr", cmd_removexattr, },
 	{ "rename", cmd_rename, },
@@ -626,6 +799,7 @@ static struct command {
 	{ "stat", cmd_stat, },
 	{ "sync", cmd_sync, },
 	{ "unlink", cmd_unlink, },
+	{ "write", cmd_write, },
 };
 
 static int compar_cmd_names(const void *A, const void *B)
diff --git a/shared/data.c b/shared/data.c
new file mode 100644
index 0000000..145abe6
--- /dev/null
+++ b/shared/data.c
@@ -0,0 +1,445 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "shared/lk/bug.h"
+#include "shared/lk/byteorder.h"
+#include "shared/lk/math64.h"
+#include "shared/lk/types.h"
+
+#include "shared/block.h"
+#include "shared/data.h"
+#include "shared/format-block.h"
+#include "shared/inode.h"
+#include "shared/txn.h"
+
+/*
+ * File data is stored in simple tree of indirect blocks with data all
+ * at the same level of the tree. The topmost block in the tree and its
+ * level are stored in the inode. The tree is sparse and branches are
+ * grown as necessary to index newly written data blocks. While
+ * manipulating the tree, we use levels to identify the blocks at
+ * various levels of the tree, with the highest levels closer to the
+ * root.
+ *
+ * The data root field in the inode contains both the persistent
+ * reference (block number, etc.) to the root block of the tree, plus
+ * the height of the tree (the level of the block it points to, plus 1).
+ * The root block reference is not part of an indirect block.
+ *
+ * The level of a block is:
+ *
+ * 0 = data block
+ * 1 = references to data blocks
+ * 2 = references to single indirect blocks (pointing to data blocks)
+ * 3 = references to double indirect blocks
+ * 4 = references to triple indirect blocks
+ *
+ * Thus a data root reference with height 1 points to a block of level 0
+ * = a single block of data at logical file offset 0.
+ */
+
+/*
+ * Return the logical block number containing offset within a file.
+ */
+static u64 dblk_from_offset(u64 offset)
+{
+	return offset >> NGNFS_BLOCK_SHIFT;
+}
+
+/*
+ * Return the offset past the beginning of a block.
+ */
+static u64 offset_in_blk(u64 offset)
+{
+	return offset & NGNFS_BLOCK_MASK;
+}
+
+/*
+ * Calculate the index of the block reference for this logical block
+ * within an indirect block at this level (1 = pointers to data blocks).
+ */
+static u32 calc_ref_ind(u64 dblk, int level)
+{
+	u32 ind;
+	int i;
+
+	BUG_ON(level < 1);
+
+	for (i = 1; i < level; i++)
+		dblk >>= NGNFS_DATA_REFS_PER_BLK_SHIFT;
+
+	ind = dblk & (NGNFS_DATA_REFS_PER_BLK - 1ULL);
+
+	return ind;
+}
+
+/*
+ * Calculate the height of the tree (level of the root pointer) needed
+ * to index the logical block dblk in this file.
+ */
+static u8 height_from_dblk(u64 dblk)
+{
+	u8 height = 2;
+
+	if (dblk == 0)
+		return 1;
+
+	while (dblk >>= NGNFS_DATA_REFS_PER_BLK_SHIFT)
+		height++;
+
+	return height;
+}
+
+static int alloc_block(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+		       struct ngnfs_txn_block *parent_tblk, struct ngnfs_block_ref *parent_ref,
+		       struct ngnfs_txn_block **tblk_ret, void **data_ret)
+{
+	struct ngnfs_block_ref ref;
+	u64 bnr;
+	int ret;
+
+	ret = ngnfs_txn_alloc_meta(txn, &bnr);
+	if (ret < 0)
+		goto out;
+
+	ret = ngnfs_txn_get_block(nfi, txn, bnr, NBF_WRITE | NBF_NEW, tblk_ret, data_ret);
+	if (ret < 0)
+		goto out;
+
+	ref.bnr = cpu_to_le64(bnr);
+	ref.alloc_counter = 0; /* XXX */
+	ngnfs_tblk_assign(parent_tblk, *parent_ref, ref);
+out:
+	return ret;
+}
+
+static int grow_height(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+		       struct ngnfs_inode_txn_ref *ino, int height)
+{
+	struct ngnfs_txn_block *parent_tblk;
+	struct ngnfs_block_ref *parent_ref;
+	struct ngnfs_indirect_block *iblk;
+	struct ngnfs_txn_block *tblk;
+	struct ngnfs_data_root *dr;
+	struct ngnfs_block_ref ref;
+	u8 level;
+	int ret;
+
+	dr = &ino->ninode->data;
+
+	ret = 0;
+	if ((dr->height > 0) &&
+	    (height > dr->height)) {
+		/* start at root and fill in till we get to existing tree */
+		level = height;
+		ref = dr->ref; /* save current root of tree */
+		parent_tblk = ino->tblk;
+		parent_ref = &dr->ref;
+
+		while (level-- > dr->height) {
+			ret = alloc_block(nfi, txn, parent_tblk, parent_ref,
+					  &tblk, (void **) &iblk);
+			if (ret < 0)
+				goto out;
+			/* growing height will always index old data to 0 */
+			parent_tblk = tblk;
+			parent_ref = &iblk->refs[0];
+		}
+
+		/* now put existing tree in indirect block */
+		ngnfs_tblk_assign(tblk, iblk->refs[0], ref);
+		ngnfs_tblk_assign(ino->tblk, dr->height, height);
+	}
+out:
+	return ret;
+}
+
+/*
+ * Allocate the indirect blocks for file data offset if they aren't
+ * already, and return the parent block of the data at offset. This is
+ * only called after checking that a read is from a valid range of the
+ * file. If a read hits an unallocated range, this returns 0 for the
+ * number of refs and the caller will zero fill the buffer.
+ */
+static int get_data_block_parent(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+				 struct ngnfs_inode_txn_ref *ino, u64 offset, int write,
+				 struct ngnfs_txn_block **tblkp,
+				 struct ngnfs_block_ref **refs, int *nr)
+{
+	struct ngnfs_txn_block *parent_tblk;
+	struct ngnfs_block_ref *parent_ref;
+	struct ngnfs_indirect_block *iblk;
+	struct ngnfs_txn_block *tblk;
+	struct ngnfs_data_root *dr;
+	u64 dblk;
+	u64 bnr;
+	u8 height;
+	u8 level;
+	u8 ind;
+	int ret;
+
+	dr = &ino->ninode->data;
+	dblk = dblk_from_offset(offset);
+	height = height_from_dblk(dblk);
+
+	/* special case: dblk 0 and tree with 0 or 1 blocks in it */
+	if ((height == 1) &&
+	    (dr->height <= 1))  {
+		if (dr->height == 0) {
+			ret = alloc_block(nfi, txn, ino->tblk, &dr->ref, NULL, NULL);
+			if (ret < 0)
+				goto out;
+			ngnfs_tblk_assign(ino->tblk, dr->height, height);
+		}
+
+		*tblkp = ino->tblk;
+		*refs = &dr->ref;
+		*nr = 1;
+		ret = 0;
+		goto out;
+	}
+
+	/* grow the height of any existing data */
+	ret = grow_height(nfi, txn, ino, height);
+	if (ret < 0)
+		goto out;
+
+	/* two cases: totally empty tree and tree of necessary height */
+	level = dr->height ? dr->height : height;
+	bnr = le64_to_cpu(dr->ref.bnr);
+	parent_tblk = ino->tblk;
+	parent_ref = &dr->ref;
+
+	/* may still have totally empty tree */
+	while (level-- > 1) {
+		if (bnr) {
+			ret = ngnfs_txn_get_block(nfi, txn, bnr, NBF_READ, &tblk, (void **) &iblk);
+			if (ret < 0)
+				goto out;
+
+		} else {
+			if (!write) {
+				*tblkp = NULL;
+				*refs = NULL;
+				*nr = NGNFS_DATA_REFS_PER_BLK - calc_ref_ind(dblk, 1);
+			}
+
+			/* get write access to update parent with new block */
+			if (parent_ref) {
+				ret = ngnfs_txn_get_block(nfi, txn, le64_to_cpu(parent_ref->bnr),
+							  NBF_WRITE, NULL, NULL);
+				if (ret < 0)
+					goto out;
+			}
+
+			ret = alloc_block(nfi, txn, parent_tblk, parent_ref,
+					  &tblk, (void **) &iblk);
+			if (ret < 0)
+				goto out;
+		}
+
+		if (level == 0)
+			break;
+
+		/* look up next block reference */
+		ind = calc_ref_ind(dblk, level);
+		bnr = le64_to_cpu(iblk->refs[ind].bnr);
+
+		parent_tblk = tblk;
+		parent_ref = &iblk->refs[ind];
+	};
+
+	if (dr->height < height)
+		ngnfs_tblk_assign(ino->tblk, dr->height, height);
+
+	*tblkp = tblk;
+	ind = calc_ref_ind(dblk, 1);
+	*refs = &iblk->refs[ind];
+	*nr = NGNFS_DATA_REFS_PER_BLK - ind;
+out:
+	return ret;
+}
+
+static int read_write_block(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+			    struct ngnfs_txn_block *parent_tblk,
+			    struct ngnfs_block_ref *parent_refs, int ind,
+			    int start, void *buf, int len, int write)
+{
+	struct ngnfs_txn_block *tblk;
+	char *data;
+	u64 bnr;
+	nbf_t nbf;
+	int ret;
+
+	BUG_ON(start + len > NGNFS_BLOCK_SIZE);
+
+	if (parent_refs)
+		bnr = le64_to_cpu(parent_refs[ind].bnr);
+	else
+		bnr = 0;
+
+	/* zero fill sparse blocks */
+	if (!write && (bnr == 0)) {
+		memset(buf, 0, len);
+		ret = 0;
+		goto out;
+	}
+
+	if (bnr) {
+		nbf = write ? NBF_WRITE : NBF_READ;
+		ret = ngnfs_txn_get_block(nfi, txn, bnr, nbf, &tblk, (void **) &data);
+		if (ret < 0)
+			goto out;
+
+	} else {
+		ret = alloc_block(nfi, txn, parent_tblk, &parent_refs[ind], &tblk, (void **) &data);
+		if (ret < 0)
+			goto out;
+	}
+
+	if (write)
+		ngnfs_tblk_memcpy(tblk, data + start, buf, len);
+	else
+		memcpy(buf, data + start, len);
+out:
+	return ret;
+}
+
+/*
+ * After reading the inode, trim the requested file data range for a
+ * read request to only cover reads less than the size of the file. If
+ * the range doesn't overlap any allocated part of the file, set len to
+ * 0 and the caller will return no error.
+ */
+static void trim_read_range(struct ngnfs_inode *ino, u64 offset, size_t *len)
+{
+	u64 i_size;
+
+	i_size = le64_to_cpu(ino->size);
+
+	if (offset >= i_size)
+		*len = 0;
+	else if ((offset + *len) >= i_size)
+		*len = i_size - offset;
+}
+
+/*
+ * If file size needs to be updated, then we already have write access
+ * to the inode, so this currently can't fail.
+ */
+static void update_isize(struct ngnfs_inode_txn_ref *itref, u64 size)
+{
+	if (size > le64_to_cpu(itref->ninode->size))
+		ngnfs_tblk_assign(itref->tblk, itref->ninode->size, cpu_to_le64(size));
+}
+
+static int read_write_range(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+			    struct ngnfs_inode_txn_ref *itref, u64 offset, char *buf, size_t len,
+			    int write, size_t *bytes)
+{
+	struct ngnfs_txn_block *tblk;
+	struct ngnfs_block_ref *refs;
+	size_t start, done, todo;
+	int nr;
+	int i;
+	int ret;
+
+	if (!write)
+		trim_read_range(itref->ninode, offset, &len);
+
+	done = 0;
+	start = offset_in_blk(offset);
+
+	while (done < len) {
+		ret = get_data_block_parent(nfi, txn, itref, offset, write, &tblk, &refs, &nr);
+		if (ret < 0)
+			goto out;
+
+		for (i = 0; i < nr; i++) {
+			todo = len - done;
+			if (todo > (NGNFS_BLOCK_SIZE - start))
+				todo = NGNFS_BLOCK_SIZE - start;
+
+			ret = read_write_block(nfi, txn, tblk, refs, i, start,
+					       buf + done, todo, write);
+			if (ret < 0)
+				goto out;
+
+			offset += todo;
+			done += todo;
+			start = 0;
+
+			if (done >= len)
+				break;
+		}
+	}
+
+	*bytes = done;
+	ret = 0;
+out:
+	if (done) {
+		if (write)
+			update_isize(itref, offset);
+		ret = 0; /* any error will be returned on next call */
+	}
+
+	return ret;
+}
+
+static ssize_t read_write_data(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			       u64 offset, void *buf, size_t len, int write)
+{
+	struct ngnfs_transaction txn;
+	struct ngnfs_inode_txn_ref itref;
+	size_t start, bytes, done, todo;
+	nbf_t nbf;
+	int ret;
+
+	bytes = 0;
+	start = offset_in_blk(offset);
+	nbf = write ? NBF_WRITE : NBF_READ;
+
+	ngnfs_txn_init(&txn);
+
+	/* split into appropriate-sized transactions */
+	for (done = 0; done < len; done += bytes) {
+
+		do {
+			todo = len - done;
+			if (todo > (NGNFS_DATA_MAX_IO - start))
+				todo = NGNFS_DATA_MAX_IO - start;
+
+			/* XXX need to prevent other IOs interleaving */
+			ret = ngnfs_inode_get(nfi, &txn, nbf, ig, &itref)			?:
+			      read_write_range(nfi, &txn, &itref, offset, buf + done,
+					       todo, write, &bytes);
+
+		} while (ngnfs_txn_retry(nfi, &txn, &ret));
+
+		ngnfs_txn_teardown(nfi, &txn);
+
+		if (ret < 0)
+			goto out;
+
+		if (bytes == 0)
+			break;
+
+		offset += bytes;
+		start = 0;
+	}
+
+	ret = done;
+out:
+	return ret ?: done;
+}
+
+ssize_t ngnfs_data_write(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			 u64 offset, void *buf, size_t len)
+{
+	return read_write_data(nfi, ig, offset, buf, len, 1);
+}
+
+ssize_t ngnfs_data_read(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			u64 offset, void *buf, size_t len)
+{
+	return read_write_data(nfi, ig, offset, buf, len, 0);
+}
diff --git a/shared/data.h b/shared/data.h
new file mode 100644
index 0000000..e5bcccf
--- /dev/null
+++ b/shared/data.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef NGNFS_SHARED_DATA_H
+#define NGNFS_SHARED_DATA_H
+
+#include "shared/fs_info.h"
+#include "shared/inode.h"
+
+/*
+ * The maximum amount of data we want to read/write in one transaction.
+ */
+#define NGNFS_DATA_MAX_IO (16 * NGNFS_BLOCK_SIZE)
+
+ssize_t ngnfs_data_read(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			u64 offset, void *buf, size_t len);
+ssize_t ngnfs_data_write(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			 u64 offset, void *buf, size_t len);
+
+#endif
diff --git a/shared/format-block.h b/shared/format-block.h
index 5344ffa..b9a65c3 100644
--- a/shared/format-block.h
+++ b/shared/format-block.h
@@ -2,12 +2,15 @@
 #ifndef NGNFS_SHARED_FORMAT_BLOCK_H
 #define NGNFS_SHARED_FORMAT_BLOCK_H
 
+#include "shared/lk/build_bug.h"
 #include "shared/lk/compiler_attributes.h"
 #include "shared/lk/limits.h"
+#include "shared/lk/log2.h"
 #include "shared/lk/types.h"
 
 #define NGNFS_BLOCK_SHIFT	12
 #define NGNFS_BLOCK_SIZE	(1 << NGNFS_BLOCK_SHIFT)
+#define NGNFS_BLOCK_MASK	(NGNFS_BLOCK_SIZE - 1ULL)
 
 struct ngnfs_block_ref {
 	__le64 bnr;
@@ -95,6 +98,37 @@ struct ngnfs_ino_gen {
 	__le64 gen;	/* inode generation, starts at 1 */
 };
 
+/*
+ * Data blocks are pointed to by a simple tree of indirect blocks rooted
+ * in a single field in the inode. The height is one greater than the
+ * level of the referenced block. It's 0 for an empty tree.
+ */
+struct ngnfs_data_root {
+	struct ngnfs_block_ref ref;
+	__u8 height;
+	__u8 _pad[7];
+};
+
+/*
+ * Indirect blocks are a simple array of block refs. We rely on the
+ * number of references per indirect block being a power of 2, so check
+ * that at compile time.
+ */
+#define NGNFS_DATA_REFS_PER_BLK (NGNFS_BLOCK_SIZE / sizeof(struct ngnfs_block_ref))
+BUILD_BUG_ON(NGNFS_DATA_REFS_PER_BLK & (NGNFS_DATA_REFS_PER_BLK - 1));
+
+/*
+ * Because blocks per ref is based on the size of a struct, we can't do
+ * the smart thing and define the shift first and then the value, we
+ * have to go backwards and define the shift from the value instead.
+ */
+
+#define NGNFS_DATA_REFS_PER_BLK_SHIFT const_ilog2(NGNFS_DATA_REFS_PER_BLK)
+
+struct ngnfs_indirect_block {
+	struct ngnfs_block_ref refs[NGNFS_DATA_REFS_PER_BLK];
+};
+
 /*
  * Inodes are stored in inode blocks.  Inode blocks numbers are directly
  * calculated from the inode number.  The block itself is formatted as a
@@ -121,6 +155,7 @@ struct ngnfs_inode {
 	__le64 crtime_nsec;
 	struct ngnfs_btree_root dirents;
 	struct ngnfs_btree_root xattrs;
+	struct ngnfs_data_root data;
 };
 
 #define NGNFS_ROOT_INO 1
diff --git a/shared/inode.c b/shared/inode.c
index 0e4edd2..9f58f8f 100644
--- a/shared/inode.c
+++ b/shared/inode.c
@@ -60,6 +60,7 @@ int ngnfs_inode_init(struct ngnfs_inode_txn_ref *itref, struct ngnfs_inode_ino_g
 	ngnfs_tblk_assign(tblk, ninode->crtime_nsec, ninode->atime_nsec);
 	ngnfs_tblk_memset(tblk, &ninode->dirents, 0, sizeof(ninode->dirents));
 	ngnfs_tblk_memset(tblk, &ninode->xattrs, 0, sizeof(ninode->xattrs));
+	ngnfs_tblk_memset(tblk, &ninode->data, 0, sizeof(ninode->data));
 
 	return 0;
 }
diff --git a/shared/lk/fls.h b/shared/lk/fls.h
new file mode 100644
index 0000000..4b49f67
--- /dev/null
+++ b/shared/lk/fls.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SHARED_LK_FLS_H
+#define _SHARED_LK_FLS_H
+
+#include "shared/lk/bits.h"
+#include "shared/lk/bitops.h"
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned int __fls(unsigned long word)
+{
+	return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __always_inline int fls(unsigned int x)
+{
+	return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
+}
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#if BITS_PER_LONG == 32
+static __always_inline int fls64(__u64 x)
+{
+	__u32 h = x >> 32;
+	if (h)
+		return fls(h) + 32;
+	return fls(x);
+}
+#elif BITS_PER_LONG == 64
+static __always_inline int fls64(__u64 x)
+{
+	if (x == 0)
+		return 0;
+	return __fls(x) + 1;
+}
+#else
+#error BITS_PER_LONG not 32 or 64
+#endif
+
+static inline unsigned int fls_long(unsigned long l)
+{
+	if (sizeof(l) == 4)
+		return fls(l);
+	return fls64(l);
+}
+
+#endif
diff --git a/shared/lk/log2.h b/shared/lk/log2.h
new file mode 100644
index 0000000..2aa7297
--- /dev/null
+++ b/shared/lk/log2.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Integer base 2 logarithm calculation
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells at redhat.com)
+ */
+
+#ifndef _SHARED_LK_LOG2_H
+#define _SHARED_LK_LOG2_H
+
+#include "shared/lk/fls.h"
+#include "shared/lk/types.h"
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ *   more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+#ifndef CONFIG_ARCH_HAS_ILOG2_U32
+static __always_inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+	return fls(n) - 1;
+}
+#endif
+
+#ifndef CONFIG_ARCH_HAS_ILOG2_U64
+static __always_inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+	return fls64(n) - 1;
+}
+#endif
+
+/**
+ * is_power_of_2() - check if a value is a power of two
+ * @n: the value to check
+ *
+ * Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ * Return: true if @n is a power of 2, otherwise false.
+ */
+static __always_inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+	return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+/**
+ * __roundup_pow_of_two() - round up to nearest power of two
+ * @n: value to round up
+ */
+static inline __attribute__((const))
+unsigned long __roundup_pow_of_two(unsigned long n)
+{
+	return 1UL << fls_long(n - 1);
+}
+
+/**
+ * __rounddown_pow_of_two() - round down to nearest power of two
+ * @n: value to round down
+ */
+static inline __attribute__((const))
+unsigned long __rounddown_pow_of_two(unsigned long n)
+{
+	return 1UL << (fls_long(n) - 1);
+}
+
+/**
+ * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
+ * @n: parameter
+ *
+ * Use this where sparse expects a true constant expression, e.g. for array
+ * indices.
+ */
+#define const_ilog2(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(n) < 2 ? 0 :			\
+		(n) & (1ULL << 63) ? 63 :	\
+		(n) & (1ULL << 62) ? 62 :	\
+		(n) & (1ULL << 61) ? 61 :	\
+		(n) & (1ULL << 60) ? 60 :	\
+		(n) & (1ULL << 59) ? 59 :	\
+		(n) & (1ULL << 58) ? 58 :	\
+		(n) & (1ULL << 57) ? 57 :	\
+		(n) & (1ULL << 56) ? 56 :	\
+		(n) & (1ULL << 55) ? 55 :	\
+		(n) & (1ULL << 54) ? 54 :	\
+		(n) & (1ULL << 53) ? 53 :	\
+		(n) & (1ULL << 52) ? 52 :	\
+		(n) & (1ULL << 51) ? 51 :	\
+		(n) & (1ULL << 50) ? 50 :	\
+		(n) & (1ULL << 49) ? 49 :	\
+		(n) & (1ULL << 48) ? 48 :	\
+		(n) & (1ULL << 47) ? 47 :	\
+		(n) & (1ULL << 46) ? 46 :	\
+		(n) & (1ULL << 45) ? 45 :	\
+		(n) & (1ULL << 44) ? 44 :	\
+		(n) & (1ULL << 43) ? 43 :	\
+		(n) & (1ULL << 42) ? 42 :	\
+		(n) & (1ULL << 41) ? 41 :	\
+		(n) & (1ULL << 40) ? 40 :	\
+		(n) & (1ULL << 39) ? 39 :	\
+		(n) & (1ULL << 38) ? 38 :	\
+		(n) & (1ULL << 37) ? 37 :	\
+		(n) & (1ULL << 36) ? 36 :	\
+		(n) & (1ULL << 35) ? 35 :	\
+		(n) & (1ULL << 34) ? 34 :	\
+		(n) & (1ULL << 33) ? 33 :	\
+		(n) & (1ULL << 32) ? 32 :	\
+		(n) & (1ULL << 31) ? 31 :	\
+		(n) & (1ULL << 30) ? 30 :	\
+		(n) & (1ULL << 29) ? 29 :	\
+		(n) & (1ULL << 28) ? 28 :	\
+		(n) & (1ULL << 27) ? 27 :	\
+		(n) & (1ULL << 26) ? 26 :	\
+		(n) & (1ULL << 25) ? 25 :	\
+		(n) & (1ULL << 24) ? 24 :	\
+		(n) & (1ULL << 23) ? 23 :	\
+		(n) & (1ULL << 22) ? 22 :	\
+		(n) & (1ULL << 21) ? 21 :	\
+		(n) & (1ULL << 20) ? 20 :	\
+		(n) & (1ULL << 19) ? 19 :	\
+		(n) & (1ULL << 18) ? 18 :	\
+		(n) & (1ULL << 17) ? 17 :	\
+		(n) & (1ULL << 16) ? 16 :	\
+		(n) & (1ULL << 15) ? 15 :	\
+		(n) & (1ULL << 14) ? 14 :	\
+		(n) & (1ULL << 13) ? 13 :	\
+		(n) & (1ULL << 12) ? 12 :	\
+		(n) & (1ULL << 11) ? 11 :	\
+		(n) & (1ULL << 10) ? 10 :	\
+		(n) & (1ULL <<  9) ?  9 :	\
+		(n) & (1ULL <<  8) ?  8 :	\
+		(n) & (1ULL <<  7) ?  7 :	\
+		(n) & (1ULL <<  6) ?  6 :	\
+		(n) & (1ULL <<  5) ?  5 :	\
+		(n) & (1ULL <<  4) ?  4 :	\
+		(n) & (1ULL <<  3) ?  3 :	\
+		(n) & (1ULL <<  2) ?  2 :	\
+		1) :				\
+	-1)
+
+/**
+ * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
+ * @n: parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+	__builtin_constant_p(n) ?	\
+	((n) < 2 ? 0 :			\
+	 63 - __builtin_clzll(n)) :	\
+	(sizeof(n) <= 4) ?		\
+	__ilog2_u32(n) :		\
+	__ilog2_u64(n)			\
+ )
+
+/**
+ * roundup_pow_of_two - round the given value up to nearest power of two
+ * @n: parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two(n)			\
+(						\
+	__builtin_constant_p(n) ? (		\
+		((n) == 1) ? 1 :		\
+		(1UL << (ilog2((n) - 1) + 1))	\
+				   ) :		\
+	__roundup_pow_of_two(n)			\
+ )
+
+/**
+ * rounddown_pow_of_two - round the given value down to nearest power of two
+ * @n: parameter
+ *
+ * round the given value down to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define rounddown_pow_of_two(n)			\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(1UL << ilog2(n))) :		\
+	__rounddown_pow_of_two(n)		\
+ )
+
+static inline __attribute_const__
+int __order_base_2(unsigned long n)
+{
+	return n > 1 ? ilog2(n - 1) + 1 : 0;
+}
+
+/**
+ * order_base_2 - calculate the (rounded up) base 2 order of the argument
+ * @n: parameter
+ *
+ * The first few values calculated by this routine:
+ *  ob2(0) = 0
+ *  ob2(1) = 0
+ *  ob2(2) = 1
+ *  ob2(3) = 2
+ *  ob2(4) = 2
+ *  ob2(5) = 3
+ *  ... and so on.
+ */
+#define order_base_2(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		((n) == 0 || (n) == 1) ? 0 :	\
+		ilog2((n) - 1) + 1) :		\
+	__order_base_2(n)			\
+)
+
+static inline __attribute__((const))
+int __bits_per(unsigned long n)
+{
+	if (n < 2)
+		return 1;
+	if (is_power_of_2(n))
+		return order_base_2(n) + 1;
+	return order_base_2(n);
+}
+
+/**
+ * bits_per - calculate the number of bits required for the argument
+ * @n: parameter
+ *
+ * This is constant-capable and can be used for compile time
+ * initializations, e.g bitfields.
+ *
+ * The first few values calculated by this routine:
+ * bf(0) = 1
+ * bf(1) = 1
+ * bf(2) = 2
+ * bf(3) = 2
+ * bf(4) = 3
+ * ... and so on.
+ */
+#define bits_per(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		((n) == 0 || (n) == 1)		\
+			? 1 : ilog2(n) + 1	\
+	) :					\
+	__bits_per(n)				\
+)
+#endif /* _LINUX_LOG2_H */
-- 
2.49.0




More information about the ngnfs-devel mailing list