[PATCH 22/23] Add file data and debugfs read/write commands

Valerie Aurora val at versity.com
Fri Apr 4 11:45:38 PDT 2025


Implement ngnfs_data_read() and ngnfs_data_write() and corresponding
debugfs commands.

Co-authored-by: Zach Brown <zab at zabbo.net>
Signed-off-by: Valerie Aurora <val at versity.com>
---
 cli/debugfs.c         | 110 ++++++++++
 shared/data.c         | 495 ++++++++++++++++++++++++++++++++++++++++++
 shared/data.h         |  13 ++
 shared/format-block.h |  21 ++
 shared/inode.c        |   1 +
 5 files changed, 640 insertions(+)
 create mode 100644 shared/data.c
 create mode 100644 shared/data.h

diff --git a/cli/debugfs.c b/cli/debugfs.c
index 3097b21..524780c 100644
--- a/cli/debugfs.c
+++ b/cli/debugfs.c
@@ -17,6 +17,7 @@
 #include "shared/lk/types.h"
 #include "shared/lk/xattr.h"
 
+#include "shared/data.h"
 #include "shared/dir.h"
 #include "shared/format-block.h"
 #include "shared/inode.h"
@@ -357,6 +358,61 @@ static void cmd_quit(struct debugfs_context *ctx, int argc, char **argv)
 	return;
 }
 
+static void cmd_read(struct debugfs_context *ctx, int argc, char **argv)
+{
+	struct ngnfs_dir_lookup_entry lent;
+	char *filename;
+	char *buf;
+	u64 offset, buf_size;
+	ssize_t bytes;
+	int ret;
+
+	if (argc != 4) {
+		printf("usage: read <filename> <offset> <length>\n");
+		return;
+	}
+
+	filename = argv[1];
+
+	ret = strtoull_nerr(&offset, argv[2], NULL, 0);
+	if (ret < 0) {
+		print_err("parsing offset", ret);
+		return;
+	}
+
+	ret = parse_ull(&buf_size, argv[3], 0, SIZE_MAX);
+	if (ret < 0)
+		return;
+
+	buf = malloc(buf_size);
+	if (!buf) {
+		printf("malloc error");
+		return;
+	}
+
+	ret = ngnfs_dir_lookup(ctx->nfi, &ctx->cwd_ig, filename, strlen(filename), &lent);
+	if (ret < 0) {
+		print_err("read", ret);
+		goto out;
+	}
+
+	bytes = ngnfs_data_read(ctx->nfi, &lent.ig, offset, buf, buf_size);
+	if (bytes < 0) {
+		print_err("read", ret);
+		goto out;
+	} else if (bytes == 0) {
+		printf("read: EOF\n");
+	} else if (bytes < buf_size) {
+		printf("short read: %ld of %llu bytes requested\n", bytes, buf_size);
+		goto out;
+	} else {
+		printf("%.*s\n", (int) bytes, buf);
+	}
+out:
+	free(buf);
+	return;
+}
+
 static void cmd_readdir(struct debugfs_context *ctx, int argc, char **argv)
 {
 	struct ngnfs_readdir_entry *buf;
@@ -606,6 +662,58 @@ static void cmd_unlink(struct debugfs_context *ctx, int argc, char **argv)
 		print_err("unlink", ret);
 }
 
+static void cmd_write(struct debugfs_context *ctx, int argc, char **argv)
+{
+	struct ngnfs_dir_lookup_entry lent;
+	char *filename;
+	char *buf;
+	u64 offset, buf_size;
+	ssize_t bytes;
+	int ret;
+
+	if (argc != 4) {
+		printf("usage: write <filename> <offset> <length>\n");
+		return;
+	}
+
+	filename = argv[1];
+
+	ret = strtoull_nerr(&offset, argv[2], NULL, 0);
+	if (ret < 0) {
+		print_err("parsing offset", ret);
+		return;
+	}
+
+	ret = parse_ull(&buf_size, argv[3], 0, SIZE_MAX);
+	if (ret < 0)
+		return;
+
+	buf = malloc(buf_size);
+	if (!buf) {
+		printf("malloc error");
+		return;
+	}
+	memset(buf, '.', buf_size);
+
+	ret = ngnfs_dir_lookup(ctx->nfi, &ctx->cwd_ig, filename, strlen(filename), &lent);
+	if (ret < 0) {
+		print_err("write", ret);
+		goto out;
+	}
+
+	bytes = ngnfs_data_write(ctx->nfi, &lent.ig, offset, buf, buf_size);
+	if (bytes < 0) {
+		print_err("write", ret);
+		goto out;
+	} else if (bytes < buf_size) {
+		printf("short write: %ld of %llu bytes requested\n", bytes, buf_size);
+		goto out;
+	}
+out:
+	free(buf);
+	return;
+}
+
 static struct command {
 	char *name;
 	void (*func)(struct debugfs_context *ctx, int argc, char **argv);
@@ -620,6 +728,7 @@ static struct command {
 	{ "mkdir", cmd_mkdir, },
 	{ "mkfs", cmd_mkfs, },
 	{ "quit", cmd_quit, },
+	{ "read", cmd_read, },
 	{ "readdir", cmd_readdir, },
 	{ "removexattr", cmd_removexattr, },
 	{ "rename", cmd_rename, },
@@ -628,6 +737,7 @@ static struct command {
 	{ "stat", cmd_stat, },
 	{ "sync", cmd_sync, },
 	{ "unlink", cmd_unlink, },
+	{ "write", cmd_write, },
 };
 
 static int compar_cmd_names(const void *A, const void *B)
diff --git a/shared/data.c b/shared/data.c
new file mode 100644
index 0000000..8c0a2bb
--- /dev/null
+++ b/shared/data.c
@@ -0,0 +1,495 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "shared/lk/bug.h"
+#include "shared/lk/byteorder.h"
+#include "shared/lk/errno.h"
+#include "shared/lk/math64.h"
+#include "shared/lk/types.h"
+
+#include "shared/block.h"
+#include "shared/data.h"
+#include "shared/format-block.h"
+#include "shared/inode.h"
+#include "shared/txn.h"
+
+/*
+ * File data is stored in simple tree of indirect blocks with data all
+ * at the same level of the tree. The topmost block in the tree and its
+ * level are stored in the inode. The tree is sparse and branches are
+ * grown as necessary to index newly written data blocks. While
+ * manipulating the tree, we use levels to identify the blocks at
+ * various levels of the tree, with the highest levels closer to the
+ * root.
+ *
+ * The data root field in the inode contains both the persistent
+ * reference (block number, etc.) to the root block of the tree, plus
+ * the height of the tree (the level of the block it points to, plus 1).
+ * The root block reference is not part of an indirect block.
+ *
+ * The level of a block is:
+ *
+ * 0 = data block
+ * 1 = references to data blocks
+ * 2 = references to single indirect blocks (pointing to data blocks)
+ * 3 = references to a double indirect block
+ * 4 = pointer to a triple indirect block
+ *
+ * Thus a data root reference with height 1 points to a block of level 0
+ * = a single block of data at logical file offset 0.
+ *
+ * The contents of file data and indirect blocks are read/written using
+ * structs containing both the transaction block reference and a pointer
+ * to the contents in memory.
+ */
+struct data_txn_ref {
+	struct ngnfs_txn_block *tblk;
+	void *buf;
+};
+
+struct indirect_txn_ref {
+	struct ngnfs_txn_block *tblk;
+	struct ngnfs_indirect_block *iblk;
+};
+
+/*
+ * Return the logical block number containing offset within a file.
+ */
+static u64 dblk_from_offset(u64 offset)
+{
+	return offset >> NGNFS_BLOCK_SHIFT;
+}
+
+/*
+ * Return the offset past the beginning of a block.
+ */
+static u64 offset_in_blk(u64 offset)
+{
+	return offset & (NGNFS_BLOCK_SIZE - 1ULL);
+}
+
+/*
+ * Calculate the index of the block reference for this logical block
+ * within an indirect block at this level (1 = pointers to data blocks).
+ */
+static u32 calc_ref_ind(u64 dblk, int level)
+{
+	u32 ind;
+	int i;
+
+	BUG_ON(level < 1);
+
+	for (i = 1; i <= level; i++)
+		dblk = div_u64_rem(dblk, NGNFS_DATA_REFS_PER_BLOCK, &ind);
+
+	return ind;
+}
+
+/*
+ * Calculate the height of the tree (level of the root pointer) needed
+ * to index the logical block dblk in this file.
+ */
+static u8 height_from_dblk(u64 dblk)
+{
+	u64 total = NGNFS_DATA_REFS_PER_BLOCK;
+	u8 height = 2;
+
+	if (dblk == 0)
+		return 1;
+
+	while (dblk >= total) {
+		height++;
+		total *= NGNFS_DATA_REFS_PER_BLOCK;
+	}
+
+	return height;
+}
+
+/*
+ * Information for finding and allocating file data and the indirect
+ * blocks along the way to a logical file block.
+ */
+struct data_path_args {
+	struct ngnfs_inode_txn_ref ino;
+	struct data_txn_ref data;
+	u64 offset;
+	u64 dblk;
+	void *buf;
+	size_t len;
+	int write;
+	nbf_t nbf; 	/* initial read/write mode for inode and data block */
+	u8 level; 	/* number of indirect blocks necessary to get this block */
+};
+
+static void init_data_path_args(struct data_path_args *da, u64 offset, void *buf, size_t len,
+				int write)
+{
+	da->ino.ninode = NULL;
+	da->ino.tblk = NULL;
+	da->data.tblk = NULL;
+	da->data.buf = NULL;
+	da->offset = offset;
+	da->dblk = dblk_from_offset(offset);
+	da->buf = buf;
+	da->len = len;
+	da->write = write;
+	da->nbf = write ? NBF_WRITE : NBF_READ;
+	da->level = height_from_dblk(da->dblk);
+}
+
+static int alloc_block(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+		       struct ngnfs_block_ref *ref, struct ngnfs_txn_block **tblk_ret,
+		       void **data_ret)
+{
+	u64 bnr;
+	int ret;
+
+	ret = ngnfs_txn_alloc_meta(txn, &bnr);
+	if (ret < 0)
+		goto out;
+
+	ret = ngnfs_txn_get_block(nfi, txn, bnr, NBF_WRITE | NBF_NEW, tblk_ret, data_ret);
+	if (ret < 0)
+		goto out;
+
+	ref->bnr = cpu_to_le64(bnr);
+	ref->alloc_counter = 0; /* XXX */
+out:
+	return ret;
+}
+
+/*
+ * Fill in a file data branch for logical file block dblk from
+ * start_level to child_level. If child doesn't exist, alloc a data
+ * block. If child does exist, graft it in. Return the top of the
+ * branch, and the data block if we allocated it.
+ */
+static int fill_block_path(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+			   u64 dblk, u8 start_level, u8 child_level,
+			   struct ngnfs_block_ref *child, struct ngnfs_block_ref *branch_ret,
+			   struct data_txn_ref *data_ret)
+{
+	struct indirect_txn_ref parent;
+	struct ngnfs_block_ref branch;
+	struct ngnfs_block_ref ref;
+	struct ngnfs_txn_block *tblk;
+	void *buf;
+	u8 level;
+	u8 ind;
+	int ret;
+
+	BUG_ON(start_level < 1);
+
+	branch.bnr = 0;
+	branch.alloc_counter = 0;
+
+	parent.tblk = NULL;
+	parent.iblk = NULL;
+
+	level = start_level;
+	while(level > child_level) {
+		ret = alloc_block(nfi, txn, &ref, &tblk, &buf);
+		if (ret < 0)
+			goto out;
+
+		if (!branch.bnr)
+			branch = ref;
+
+		if (parent.tblk) {
+			ind = calc_ref_ind(dblk, level);
+			ngnfs_tblk_assign(parent.tblk, parent.iblk->refs[ind], ref);
+		}
+		parent.tblk = tblk;
+		parent.iblk = buf;
+		level--;
+	}
+
+	if (child_level == 0) {
+		data_ret->tblk = tblk;
+		data_ret->buf = buf;
+	} else {
+		ind = calc_ref_ind(dblk, child_level);
+		ngnfs_tblk_assign(parent.tblk, parent.iblk->refs[ind], *child);
+	}
+
+	*branch_ret = branch;
+	ret = 0;
+out:
+	return ret;
+}
+
+/*
+ * Grow the height of the tree to the height necessary to store
+ * da->dblk. If the tree is empty, allocate a data block and return it
+ * in da->data.
+ */
+static int grow_height(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+		       struct data_path_args *da)
+{
+	struct ngnfs_inode_txn_ref *ino;
+	struct ngnfs_block_ref new_root;
+	struct ngnfs_data_root *dr;
+	u64 bnr;
+	u64 dblk;
+	int ret;
+
+	ino = &da->ino;
+	bnr = le64_to_cpu(ino->ninode->ig.ino); /* XXX should get out of tblk */
+	dr = &ino->ninode->data;
+
+	/* attempt to convert read access on inode to write */
+	if (da->nbf != NBF_WRITE) {
+		ret = ngnfs_txn_get_block(nfi, txn, bnr, NBF_WRITE, NULL, NULL);
+		if (ret < 0)
+			goto out;
+	}
+
+	/* an existing tree will index to dblk 0 in new levels */
+	if (dr->height != 0)
+		dblk = 0;
+	else
+		dblk = da->dblk;
+
+	ret = fill_block_path(nfi, txn, dblk, da->level, dr->height, &dr->ref, &new_root,
+			      &da->data);
+	if (ret < 0)
+		goto out;
+
+	ngnfs_tblk_assign(ino->tblk, dr->ref, new_root);
+	ngnfs_tblk_assign(ino->tblk, dr->height, da->level);
+out:
+	return ret;
+}
+
+/*
+ * Grow a branch for da->dblk from the parent's level down through the
+ * data block, graft it into the existing tree, and return the data
+ * block in da->data. Cannot be used at the root of the tree.
+ */
+static int grow_branch(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+		       struct indirect_txn_ref *parent, u64 bnr, u8 level,
+		       struct data_path_args *da)
+{
+	struct ngnfs_block_ref branch;
+	u8 ind;
+	int ret;
+
+	BUG_ON(da->ino.ninode->data.height == level);
+
+	/* we have read access on the parent, try for write access */
+	ret = ngnfs_txn_get_block(nfi, txn, bnr, NBF_WRITE, NULL, NULL);
+	if (ret < 0)
+		goto out;
+
+	/* goal level is a data block, which is level 0 */
+	ret = fill_block_path(nfi, txn, da->dblk, level, 0, NULL, &branch, &da->data);
+	if (ret < 0)
+		goto out;
+
+	ind = calc_ref_ind(da->dblk, level);
+	ngnfs_tblk_assign(parent->tblk, parent->iblk->refs[ind], branch);
+out:
+	return ret;
+}
+
+/*
+ * Get the block at this logical file data offset and return it in
+ * da->data. If it doesn't exist, allocate it and all indirect blocks on
+ * its path if necessary. This is only called after checking that a read
+ * is from a valid range of the file, so it should always allocate
+ * missing blocks to zero-fill holes.
+ */
+static int get_data_block(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+			  struct data_path_args *da)
+{
+	struct ngnfs_data_root *dr;
+	struct indirect_txn_ref parent;
+	struct ngnfs_txn_block *tblk;
+	void *buf;
+	nbf_t nbf;
+	u64 bnr;
+	u8 level;
+	u8 ind;
+	int ret;
+
+	dr = &da->ino.ninode->data;
+
+	/*
+	 * Grow height of tree as necessary. If we grow a tree with one
+	 * data block (height = 1), then we also get the data block.
+	 */
+	if (da->level > dr->height) {
+		ret = grow_height(nfi, txn, da);
+		if (ret < 0)
+			goto out;
+
+		if (da->level == 1)
+			goto out;
+	}
+
+	nbf = NBF_READ;
+	level = dr->height;
+	bnr = le64_to_cpu(dr->ref.bnr);
+
+	while (level-- > 0) {
+		if (level == 0)
+			nbf = da->nbf;
+
+		ret = ngnfs_txn_get_block(nfi, txn, bnr, nbf, &tblk,
+					  (void **) &buf);
+		if (ret < 0)
+			goto out;
+
+		if (level == 0) {
+			da->data.tblk = tblk;
+			da->data.buf = buf;
+			goto out;
+		}
+
+		/* look up next block reference */
+		parent.tblk = tblk;
+		parent.iblk = buf;
+		ind = calc_ref_ind(da->dblk, level);
+		bnr = le64_to_cpu(parent.iblk->refs[ind].bnr);
+
+		if (bnr == 0) {
+			ret = grow_branch(nfi, txn, &parent, bnr, level, da);
+			if (ret < 0)
+				goto out;
+			break;
+		}
+	};
+out:
+	BUG_ON((ret == 0) && (da->data.buf == NULL));
+	return ret;
+}
+
+static int read_write_block(struct data_txn_ref *dtref, size_t offset, void *buf, size_t len,
+			    int write)
+{
+	BUG_ON(offset >= NGNFS_BLOCK_SIZE);
+
+	if (write)
+		ngnfs_tblk_memcpy(dtref->tblk, dtref->buf + offset, buf, len);
+	else
+		memcpy(buf, dtref->buf + offset, len);
+
+	return 0;
+}
+
+/*
+ * After reading the inode, trim the requested file data range for a
+ * read request to only cover reads less than the size of the file. If
+ * the range doesn't overlap any allocated part of the file, set len to
+ * 0 and the caller will return no error.
+ */
+static void trim_read_range(struct data_path_args *da)
+{
+	size_t i_size;
+
+	i_size = le64_to_cpu(da->ino.ninode->size);
+
+	if (da->offset >= i_size)
+		da->len = 0;
+	else if ((da->offset + da->len) >= i_size)
+		da->len = i_size - da->offset;
+}
+
+/*
+ * If file size needs to be updated, then we already have write access
+ * to the inode, so this currently can't fail.
+ */
+static void update_isize(struct data_path_args *da)
+{
+	struct ngnfs_inode_txn_ref *ino = &da->ino;
+
+	if (da->offset > le64_to_cpu(ino->ninode->size))
+		ngnfs_tblk_assign(ino->tblk, ino->ninode->size, cpu_to_le64(da->offset));
+}
+
+static int read_write_range(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+			    struct data_path_args *da, size_t *bytes)
+{
+	size_t start, bytes_done, bytes_todo;
+	int ret;
+
+	if (!da->write)
+		trim_read_range(da);
+
+	bytes_done = 0;
+	start = offset_in_blk(da->offset);
+
+	while (bytes_done < da->len) {
+		ret = get_data_block(nfi, txn, da);
+		if (ret < 0)
+			goto out;
+
+		bytes_todo = da->len - bytes_done;
+		if (bytes_todo > (NGNFS_BLOCK_SIZE - start))
+			bytes_todo = NGNFS_BLOCK_SIZE - start;
+
+		ret = read_write_block(&da->data, start, da->buf + bytes_done, bytes_todo,
+				       da->write);
+		if (ret < 0)
+			goto out;
+
+		bytes_done += bytes_todo;
+		da->offset += bytes_todo;
+		da->dblk = dblk_from_offset(da->offset);
+		da->level = height_from_dblk(da->dblk);
+		start = 0;
+	}
+
+	ret = 0;
+out:
+	if (bytes_done) {
+		*bytes = bytes_done;
+		update_isize(da);
+		ret = 0; /* any error will be returned on next call */
+	}
+	return ret;
+}
+
+static ssize_t read_write_data(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			       u64 offset, void *buf, size_t len, int write)
+{
+	struct {
+		struct data_path_args da;
+		struct ngnfs_transaction txn;
+	} *op;
+	size_t bytes;
+	int ret;
+
+	op = kmalloc(sizeof(*op), GFP_NOFS);
+	if (!op)
+		return -ENOMEM;
+
+	bytes = 0;
+	init_data_path_args(&op->da, offset, buf, len, write);
+	ngnfs_txn_init(&op->txn);
+
+	/* XXX break up into smaller transactions */
+	do {
+		ret = ngnfs_inode_get(nfi, &op->txn, op->da.nbf, ig, &op->da.ino) 		?:
+		      read_write_range(nfi, &op->txn, &op->da, &bytes);
+
+	} while (ngnfs_txn_retry(nfi, &op->txn, &ret));
+
+	ngnfs_txn_teardown(nfi, &op->txn);
+
+	if (ret == 0)
+		ret = bytes;
+
+	return ret;
+}
+
+ssize_t ngnfs_data_write(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			 u64 offset, void *buf, size_t len)
+{
+	return read_write_data(nfi, ig, offset, buf, len, 1);
+}
+
+ssize_t ngnfs_data_read(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			u64 offset, void *buf, size_t len)
+{
+	return read_write_data(nfi, ig, offset, buf, len, 0);
+}
diff --git a/shared/data.h b/shared/data.h
new file mode 100644
index 0000000..aadcc75
--- /dev/null
+++ b/shared/data.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef NGNFS_SHARED_DATA_H
+#define NGNFS_SHARED_DATA_H
+
+#include "shared/fs_info.h"
+#include "shared/inode.h"
+
+ssize_t ngnfs_data_read(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			u64 offset, void *buf, size_t len);
+ssize_t ngnfs_data_write(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+			 u64 offset, void *buf, size_t len);
+
+#endif
diff --git a/shared/format-block.h b/shared/format-block.h
index 12d8bfc..b6749d8 100644
--- a/shared/format-block.h
+++ b/shared/format-block.h
@@ -95,6 +95,26 @@ struct ngnfs_ino_gen {
 	__le64 gen;	/* inode generation, starts at 1 */
 };
 
+/*
+ * Data blocks are pointed to by a simple tree of indirect blocks rooted
+ * in a single field in the inode. The height is one greater than the
+ * level of the referenced block. It's 0 for an empty tree.
+ */
+struct ngnfs_data_root {
+	struct ngnfs_block_ref ref;
+	__u8 _pad[7];
+	__u8 height;
+};
+
+/*
+ * Indirect blocks are a simple array of block refs.
+ */
+#define NGNFS_DATA_REFS_PER_BLOCK (NGNFS_BLOCK_SIZE/sizeof(struct ngnfs_block_ref))
+
+struct ngnfs_indirect_block {
+	struct ngnfs_block_ref refs[NGNFS_DATA_REFS_PER_BLOCK];
+};
+
 /*
  * Inodes are stored in inode blocks.  Inode blocks numbers are directly
  * calculated from the inode number.  The block itself is formatted as a
@@ -118,6 +138,7 @@ struct ngnfs_inode {
 	__le64 crtime_nsec;
 	struct ngnfs_btree_root dirents;
 	struct ngnfs_btree_root xattrs;
+	struct ngnfs_data_root data;
 };
 
 #define NGNFS_ROOT_INO 1
diff --git a/shared/inode.c b/shared/inode.c
index b147980..5146e90 100644
--- a/shared/inode.c
+++ b/shared/inode.c
@@ -64,6 +64,7 @@ int ngnfs_inode_init(struct ngnfs_inode_txn_ref *itref, struct ngnfs_inode_ino_g
 	ngnfs_tblk_assign(tblk, ninode->crtime_nsec, ninode->atime_nsec);
 	ngnfs_tblk_memset(tblk, &ninode->dirents, 0, sizeof(ninode->dirents));
 	ngnfs_tblk_memset(tblk, &ninode->xattrs, 0, sizeof(ninode->xattrs));
+	ngnfs_tblk_memset(tblk, &ninode->data, 0, sizeof(ninode->data));
 
 	return 0;
 }
-- 
2.48.1




More information about the ngnfs-devel mailing list