[PATCH 19/20] Add file data and debugfs read/write commands
Valerie Aurora
val at versity.com
Thu Jun 12 13:11:11 PDT 2025
Implement ngnfs_data_read() and ngnfs_data_write() and corresponding
debugfs commands. Also copy over const_ilog2() and related functions
from the kernel.
Co-authored-by: Zach Brown <zab at zabbo.net>
Signed-off-by: Valerie Aurora <val at versity.com>
---
cli/debugfs.c | 174 +++++++++++++++++
shared/data.c | 445 ++++++++++++++++++++++++++++++++++++++++++
shared/data.h | 18 ++
shared/format-block.h | 35 ++++
shared/inode.c | 1 +
shared/lk/fls.h | 68 +++++++
shared/lk/log2.h | 258 ++++++++++++++++++++++++
7 files changed, 999 insertions(+)
create mode 100644 shared/data.c
create mode 100644 shared/data.h
create mode 100644 shared/lk/fls.h
create mode 100644 shared/lk/log2.h
diff --git a/cli/debugfs.c b/cli/debugfs.c
index 82b2724..6fb21a2 100644
--- a/cli/debugfs.c
+++ b/cli/debugfs.c
@@ -17,6 +17,7 @@
#include "shared/lk/types.h"
#include "shared/lk/xattr.h"
+#include "shared/data.h"
#include "shared/dir.h"
#include "shared/format-block.h"
#include "shared/inode.h"
@@ -357,6 +358,105 @@ static void cmd_quit(struct debugfs_context *ctx, int argc, char **argv)
return;
}
+static void cmd_read(struct debugfs_context *ctx, int argc, char **argv)
+{
+ struct ngnfs_dir_lookup_entry lent;
+ char *filename;
+ char *buf;
+ u64 offset, read_size;
+ u64 todo, done, zeroes, poisoned, garbage;
+ u64 i;
+ int buf_size = 4 * NGNFS_DATA_MAX_IO; /* allow multiple txn's per call */
+ int ret;
+
+ if (argc != 4) {
+ printf("usage: read <filename> <offset> <length>\n");
+ return;
+ }
+
+ filename = argv[1];
+
+ ret = strtoull_nerr(&offset, argv[2], NULL, 0);
+ if (ret < 0) {
+ print_err("parsing offset", ret);
+ return;
+ }
+
+ ret = parse_ull(&read_size, argv[3], 0, SIZE_MAX);
+ if (ret < 0)
+ return;
+
+ if (read_size < buf_size)
+ buf_size = read_size;
+
+ buf = malloc(buf_size);
+ if (!buf) {
+ printf("malloc error");
+ return;
+ }
+
+ ret = ngnfs_dir_lookup(ctx->nfi, &ctx->cwd_ig, filename, strlen(filename), &lent);
+ if (ret < 0) {
+ print_err("read", ret);
+ goto out;
+ }
+
+ zeroes = 0;
+ poisoned = 0;
+ garbage = 0;
+
+ for (done = 0; done < read_size; done += ret) {
+ todo = read_size - done;
+ if (todo > buf_size)
+ todo = buf_size;
+
+ memset(buf, 'x', todo); /* poison the buffer */
+
+ ret = ngnfs_data_read(ctx->nfi, &lent.ig, offset, buf, todo);
+ if (ret < 0) {
+ print_err("read", ret);
+ goto out;
+ } else if (ret == 0) {
+ printf("read: EOF\n");
+ break;
+ }
+
+ /* count what kind of bytes we read */
+ for (i = 0; i < ret; i++) {
+ switch (buf[i]) {
+ case '.':
+ break;
+ case '\0':
+ zeroes++;
+ break;
+ case 'x':
+ if (poisoned == 0)
+ printf("error: poisoned byte found at %llu\n", i + done);
+ poisoned++;
+ break;
+ default:
+ if (garbage == 0)
+ printf("error: garbage byte %c found at %llu\n",
+ buf[i], i + done);
+ garbage++;
+ break;
+ }
+ }
+ offset += ret;
+ }
+
+ if (done < read_size)
+ printf("short read: %llu bytes of %llu requested\n", done, read_size);
+
+ if (done > read_size)
+ printf("error: read too many bytes: %llu of %llu requested\n", done, read_size);
+
+ printf("read %llu bytes (%llu zero bytes %llu poisoned bytes %llu garbage bytes)\n",
+ done, zeroes, poisoned, garbage);
+out:
+ free(buf);
+}
+
static void cmd_readdir(struct debugfs_context *ctx, int argc, char **argv)
{
struct ngnfs_readdir_entry *buf;
@@ -604,6 +704,78 @@ static void cmd_unlink(struct debugfs_context *ctx, int argc, char **argv)
print_err("unlink", ret);
}
+static void cmd_write(struct debugfs_context *ctx, int argc, char **argv)
+{
+ struct ngnfs_dir_lookup_entry lent;
+ char *filename;
+ char *buf;
+ u64 offset;
+ u64 write_size;
+ u64 buf_size = 4 * NGNFS_DATA_MAX_IO; /* allow multiple txn's per call */
+ u64 todo, done;
+ int ret;
+
+ if (argc != 4) {
+ printf("usage: write <filename> <offset> <length>\n");
+ return;
+ }
+
+ filename = argv[1];
+
+ ret = strtoull_nerr(&offset, argv[2], NULL, 0);
+ if (ret < 0) {
+ print_err("parsing offset", ret);
+ return;
+ }
+
+ ret = parse_ull(&write_size, argv[3], 0, SIZE_MAX);
+ if (ret < 0)
+ return;
+
+ if (write_size < buf_size)
+ buf_size = write_size;
+
+ buf = malloc(buf_size);
+ if (!buf) {
+ printf("malloc error");
+ return;
+ }
+ memset(buf, '.', buf_size);
+
+ ret = ngnfs_dir_lookup(ctx->nfi, &ctx->cwd_ig, filename, strlen(filename), &lent);
+ if (ret < 0) {
+ print_err("write", ret);
+ goto out;
+ }
+
+ for (done = 0; done < write_size; done += ret) {
+ todo = write_size - done;
+ if (todo > buf_size)
+ todo = buf_size;
+
+ ret = ngnfs_data_write(ctx->nfi, &lent.ig, offset, buf, todo);
+
+ if (ret < 0) {
+ print_err("write", ret);
+ goto out;
+ }
+
+ if (ret == 0)
+ break;
+
+ offset += ret;
+ }
+
+ if (done < write_size)
+ printf("short write: %llu of %llu bytes requested\n", done, buf_size);
+ else if (done > write_size)
+ printf("error: wrote too many bytes: %llu of %llu requested\n", done, buf_size);
+ else
+ printf("wrote %llu bytes\n", done);
+out:
+ free(buf);
+}
+
static struct command {
char *name;
void (*func)(struct debugfs_context *ctx, int argc, char **argv);
@@ -618,6 +790,7 @@ static struct command {
{ "mkdir", cmd_mkdir, },
{ "mkfs", cmd_mkfs, },
{ "quit", cmd_quit, },
+ { "read", cmd_read, },
{ "readdir", cmd_readdir, },
{ "removexattr", cmd_removexattr, },
{ "rename", cmd_rename, },
@@ -626,6 +799,7 @@ static struct command {
{ "stat", cmd_stat, },
{ "sync", cmd_sync, },
{ "unlink", cmd_unlink, },
+ { "write", cmd_write, },
};
static int compar_cmd_names(const void *A, const void *B)
diff --git a/shared/data.c b/shared/data.c
new file mode 100644
index 0000000..145abe6
--- /dev/null
+++ b/shared/data.c
@@ -0,0 +1,445 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "shared/lk/bug.h"
+#include "shared/lk/byteorder.h"
+#include "shared/lk/math64.h"
+#include "shared/lk/types.h"
+
+#include "shared/block.h"
+#include "shared/data.h"
+#include "shared/format-block.h"
+#include "shared/inode.h"
+#include "shared/txn.h"
+
+/*
+ * File data is stored in simple tree of indirect blocks with data all
+ * at the same level of the tree. The topmost block in the tree and its
+ * level are stored in the inode. The tree is sparse and branches are
+ * grown as necessary to index newly written data blocks. While
+ * manipulating the tree, we use levels to identify the blocks at
+ * various levels of the tree, with the highest levels closer to the
+ * root.
+ *
+ * The data root field in the inode contains both the persistent
+ * reference (block number, etc.) to the root block of the tree, plus
+ * the height of the tree (the level of the block it points to, plus 1).
+ * The root block reference is not part of an indirect block.
+ *
+ * The level of a block is:
+ *
+ * 0 = data block
+ * 1 = references to data blocks
+ * 2 = references to single indirect blocks (pointing to data blocks)
+ * 3 = references to double indirect blocks
+ * 4 = references to triple indirect blocks
+ *
+ * Thus a data root reference with height 1 points to a block of level 0
+ * = a single block of data at logical file offset 0.
+ */
+
+/*
+ * Return the logical block number containing offset within a file.
+ */
+static u64 dblk_from_offset(u64 offset)
+{
+ return offset >> NGNFS_BLOCK_SHIFT;
+}
+
+/*
+ * Return the offset past the beginning of a block.
+ */
+static u64 offset_in_blk(u64 offset)
+{
+ return offset & NGNFS_BLOCK_MASK;
+}
+
+/*
+ * Calculate the index of the block reference for this logical block
+ * within an indirect block at this level (1 = pointers to data blocks).
+ */
+static u32 calc_ref_ind(u64 dblk, int level)
+{
+ u32 ind;
+ int i;
+
+ BUG_ON(level < 1);
+
+ for (i = 1; i < level; i++)
+ dblk >>= NGNFS_DATA_REFS_PER_BLK_SHIFT;
+
+ ind = dblk & (NGNFS_DATA_REFS_PER_BLK - 1ULL);
+
+ return ind;
+}
+
+/*
+ * Calculate the height of the tree (level of the root pointer) needed
+ * to index the logical block dblk in this file.
+ */
+static u8 height_from_dblk(u64 dblk)
+{
+ u8 height = 2;
+
+ if (dblk == 0)
+ return 1;
+
+ while (dblk >>= NGNFS_DATA_REFS_PER_BLK_SHIFT)
+ height++;
+
+ return height;
+}
+
+static int alloc_block(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+ struct ngnfs_txn_block *parent_tblk, struct ngnfs_block_ref *parent_ref,
+ struct ngnfs_txn_block **tblk_ret, void **data_ret)
+{
+ struct ngnfs_block_ref ref;
+ u64 bnr;
+ int ret;
+
+ ret = ngnfs_txn_alloc_meta(txn, &bnr);
+ if (ret < 0)
+ goto out;
+
+ ret = ngnfs_txn_get_block(nfi, txn, bnr, NBF_WRITE | NBF_NEW, tblk_ret, data_ret);
+ if (ret < 0)
+ goto out;
+
+ ref.bnr = cpu_to_le64(bnr);
+ ref.alloc_counter = 0; /* XXX */
+ ngnfs_tblk_assign(parent_tblk, *parent_ref, ref);
+out:
+ return ret;
+}
+
+static int grow_height(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+ struct ngnfs_inode_txn_ref *ino, int height)
+{
+ struct ngnfs_txn_block *parent_tblk;
+ struct ngnfs_block_ref *parent_ref;
+ struct ngnfs_indirect_block *iblk;
+ struct ngnfs_txn_block *tblk;
+ struct ngnfs_data_root *dr;
+ struct ngnfs_block_ref ref;
+ u8 level;
+ int ret;
+
+ dr = &ino->ninode->data;
+
+ ret = 0;
+ if ((dr->height > 0) &&
+ (height > dr->height)) {
+ /* start at root and fill in till we get to existing tree */
+ level = height;
+ ref = dr->ref; /* save current root of tree */
+ parent_tblk = ino->tblk;
+ parent_ref = &dr->ref;
+
+ while (level-- > dr->height) {
+ ret = alloc_block(nfi, txn, parent_tblk, parent_ref,
+ &tblk, (void **) &iblk);
+ if (ret < 0)
+ goto out;
+ /* growing height will always index old data to 0 */
+ parent_tblk = tblk;
+ parent_ref = &iblk->refs[0];
+ }
+
+ /* now put existing tree in indirect block */
+ ngnfs_tblk_assign(tblk, iblk->refs[0], ref);
+ ngnfs_tblk_assign(ino->tblk, dr->height, height);
+ }
+out:
+ return ret;
+}
+
+/*
+ * Allocate the indirect blocks for file data offset if they aren't
+ * already, and return the parent block of the data at offset. This is
+ * only called after checking that a read is from a valid range of the
+ * file. If a read hits an unallocated range, this returns 0 for the
+ * number of refs and the caller will zero fill the buffer.
+ */
+static int get_data_block_parent(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+ struct ngnfs_inode_txn_ref *ino, u64 offset, int write,
+ struct ngnfs_txn_block **tblkp,
+ struct ngnfs_block_ref **refs, int *nr)
+{
+ struct ngnfs_txn_block *parent_tblk;
+ struct ngnfs_block_ref *parent_ref;
+ struct ngnfs_indirect_block *iblk;
+ struct ngnfs_txn_block *tblk;
+ struct ngnfs_data_root *dr;
+ u64 dblk;
+ u64 bnr;
+ u8 height;
+ u8 level;
+ u8 ind;
+ int ret;
+
+ dr = &ino->ninode->data;
+ dblk = dblk_from_offset(offset);
+ height = height_from_dblk(dblk);
+
+ /* special case: dblk 0 and tree with 0 or 1 blocks in it */
+ if ((height == 1) &&
+ (dr->height <= 1)) {
+ if (dr->height == 0) {
+ ret = alloc_block(nfi, txn, ino->tblk, &dr->ref, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ngnfs_tblk_assign(ino->tblk, dr->height, height);
+ }
+
+ *tblkp = ino->tblk;
+ *refs = &dr->ref;
+ *nr = 1;
+ ret = 0;
+ goto out;
+ }
+
+ /* grow the height of any existing data */
+ ret = grow_height(nfi, txn, ino, height);
+ if (ret < 0)
+ goto out;
+
+ /* two cases: totally empty tree and tree of necessary height */
+ level = dr->height ? dr->height : height;
+ bnr = le64_to_cpu(dr->ref.bnr);
+ parent_tblk = ino->tblk;
+ parent_ref = &dr->ref;
+
+ /* may still have totally empty tree */
+ while (level-- > 1) {
+ if (bnr) {
+ ret = ngnfs_txn_get_block(nfi, txn, bnr, NBF_READ, &tblk, (void **) &iblk);
+ if (ret < 0)
+ goto out;
+
+ } else {
+ if (!write) {
+ *tblkp = NULL;
+ *refs = NULL;
+ *nr = NGNFS_DATA_REFS_PER_BLK - calc_ref_ind(dblk, 1);
+ }
+
+ /* get write access to update parent with new block */
+ if (parent_ref) {
+ ret = ngnfs_txn_get_block(nfi, txn, le64_to_cpu(parent_ref->bnr),
+ NBF_WRITE, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = alloc_block(nfi, txn, parent_tblk, parent_ref,
+ &tblk, (void **) &iblk);
+ if (ret < 0)
+ goto out;
+ }
+
+ if (level == 0)
+ break;
+
+ /* look up next block reference */
+ ind = calc_ref_ind(dblk, level);
+ bnr = le64_to_cpu(iblk->refs[ind].bnr);
+
+ parent_tblk = tblk;
+ parent_ref = &iblk->refs[ind];
+ };
+
+ if (dr->height < height)
+ ngnfs_tblk_assign(ino->tblk, dr->height, height);
+
+ *tblkp = tblk;
+ ind = calc_ref_ind(dblk, 1);
+ *refs = &iblk->refs[ind];
+ *nr = NGNFS_DATA_REFS_PER_BLK - ind;
+out:
+ return ret;
+}
+
+static int read_write_block(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+ struct ngnfs_txn_block *parent_tblk,
+ struct ngnfs_block_ref *parent_refs, int ind,
+ int start, void *buf, int len, int write)
+{
+ struct ngnfs_txn_block *tblk;
+ char *data;
+ u64 bnr;
+ nbf_t nbf;
+ int ret;
+
+ BUG_ON(start + len > NGNFS_BLOCK_SIZE);
+
+ if (parent_refs)
+ bnr = le64_to_cpu(parent_refs[ind].bnr);
+ else
+ bnr = 0;
+
+ /* zero fill sparse blocks */
+ if (!write && (bnr == 0)) {
+ memset(buf, 0, len);
+ ret = 0;
+ goto out;
+ }
+
+ if (bnr) {
+ nbf = write ? NBF_WRITE : NBF_READ;
+ ret = ngnfs_txn_get_block(nfi, txn, bnr, nbf, &tblk, (void **) &data);
+ if (ret < 0)
+ goto out;
+
+ } else {
+ ret = alloc_block(nfi, txn, parent_tblk, &parent_refs[ind], &tblk, (void **) &data);
+ if (ret < 0)
+ goto out;
+ }
+
+ if (write)
+ ngnfs_tblk_memcpy(tblk, data + start, buf, len);
+ else
+ memcpy(buf, data + start, len);
+out:
+ return ret;
+}
+
+/*
+ * After reading the inode, trim the requested file data range for a
+ * read request to only cover reads less than the size of the file. If
+ * the range doesn't overlap any allocated part of the file, set len to
+ * 0 and the caller will return no error.
+ */
+static void trim_read_range(struct ngnfs_inode *ino, u64 offset, size_t *len)
+{
+ u64 i_size;
+
+ i_size = le64_to_cpu(ino->size);
+
+ if (offset >= i_size)
+ *len = 0;
+ else if ((offset + *len) >= i_size)
+ *len = i_size - offset;
+}
+
+/*
+ * If file size needs to be updated, then we already have write access
+ * to the inode, so this currently can't fail.
+ */
+static void update_isize(struct ngnfs_inode_txn_ref *itref, u64 size)
+{
+ if (size > le64_to_cpu(itref->ninode->size))
+ ngnfs_tblk_assign(itref->tblk, itref->ninode->size, cpu_to_le64(size));
+}
+
+static int read_write_range(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+ struct ngnfs_inode_txn_ref *itref, u64 offset, char *buf, size_t len,
+ int write, size_t *bytes)
+{
+ struct ngnfs_txn_block *tblk;
+ struct ngnfs_block_ref *refs;
+ size_t start, done, todo;
+ int nr;
+ int i;
+ int ret;
+
+ if (!write)
+ trim_read_range(itref->ninode, offset, &len);
+
+ done = 0;
+ start = offset_in_blk(offset);
+
+ while (done < len) {
+ ret = get_data_block_parent(nfi, txn, itref, offset, write, &tblk, &refs, &nr);
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ todo = len - done;
+ if (todo > (NGNFS_BLOCK_SIZE - start))
+ todo = NGNFS_BLOCK_SIZE - start;
+
+ ret = read_write_block(nfi, txn, tblk, refs, i, start,
+ buf + done, todo, write);
+ if (ret < 0)
+ goto out;
+
+ offset += todo;
+ done += todo;
+ start = 0;
+
+ if (done >= len)
+ break;
+ }
+ }
+
+ *bytes = done;
+ ret = 0;
+out:
+ if (done) {
+ if (write)
+ update_isize(itref, offset);
+ ret = 0; /* any error will be returned on next call */
+ }
+
+ return ret;
+}
+
+static ssize_t read_write_data(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+ u64 offset, void *buf, size_t len, int write)
+{
+ struct ngnfs_transaction txn;
+ struct ngnfs_inode_txn_ref itref;
+ size_t start, bytes, done, todo;
+ nbf_t nbf;
+ int ret;
+
+ bytes = 0;
+ start = offset_in_blk(offset);
+ nbf = write ? NBF_WRITE : NBF_READ;
+
+ ngnfs_txn_init(&txn);
+
+ /* split into appropriate-sized transactions */
+ for (done = 0; done < len; done += bytes) {
+
+ do {
+ todo = len - done;
+ if (todo > (NGNFS_DATA_MAX_IO - start))
+ todo = NGNFS_DATA_MAX_IO - start;
+
+ /* XXX need to prevent other IOs interleaving */
+ ret = ngnfs_inode_get(nfi, &txn, nbf, ig, &itref) ?:
+ read_write_range(nfi, &txn, &itref, offset, buf + done,
+ todo, write, &bytes);
+
+ } while (ngnfs_txn_retry(nfi, &txn, &ret));
+
+ ngnfs_txn_teardown(nfi, &txn);
+
+ if (ret < 0)
+ goto out;
+
+ if (bytes == 0)
+ break;
+
+ offset += bytes;
+ start = 0;
+ }
+
+ ret = done;
+out:
+ return ret ?: done;
+}
+
+ssize_t ngnfs_data_write(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+ u64 offset, void *buf, size_t len)
+{
+ return read_write_data(nfi, ig, offset, buf, len, 1);
+}
+
+ssize_t ngnfs_data_read(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+ u64 offset, void *buf, size_t len)
+{
+ return read_write_data(nfi, ig, offset, buf, len, 0);
+}
diff --git a/shared/data.h b/shared/data.h
new file mode 100644
index 0000000..e5bcccf
--- /dev/null
+++ b/shared/data.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef NGNFS_SHARED_DATA_H
+#define NGNFS_SHARED_DATA_H
+
+#include "shared/fs_info.h"
+#include "shared/inode.h"
+
+/*
+ * The maximum amount of data we want to read/write in one transaction.
+ */
+#define NGNFS_DATA_MAX_IO (16 * NGNFS_BLOCK_SIZE)
+
+ssize_t ngnfs_data_read(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+ u64 offset, void *buf, size_t len);
+ssize_t ngnfs_data_write(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig,
+ u64 offset, void *buf, size_t len);
+
+#endif
diff --git a/shared/format-block.h b/shared/format-block.h
index 5344ffa..b9a65c3 100644
--- a/shared/format-block.h
+++ b/shared/format-block.h
@@ -2,12 +2,15 @@
#ifndef NGNFS_SHARED_FORMAT_BLOCK_H
#define NGNFS_SHARED_FORMAT_BLOCK_H
+#include "shared/lk/build_bug.h"
#include "shared/lk/compiler_attributes.h"
#include "shared/lk/limits.h"
+#include "shared/lk/log2.h"
#include "shared/lk/types.h"
#define NGNFS_BLOCK_SHIFT 12
#define NGNFS_BLOCK_SIZE (1 << NGNFS_BLOCK_SHIFT)
+#define NGNFS_BLOCK_MASK (NGNFS_BLOCK_SIZE - 1ULL)
struct ngnfs_block_ref {
__le64 bnr;
@@ -95,6 +98,37 @@ struct ngnfs_ino_gen {
__le64 gen; /* inode generation, starts at 1 */
};
+/*
+ * Data blocks are pointed to by a simple tree of indirect blocks rooted
+ * in a single field in the inode. The height is one greater than the
+ * level of the referenced block. It's 0 for an empty tree.
+ */
+struct ngnfs_data_root {
+ struct ngnfs_block_ref ref;
+ __u8 height;
+ __u8 _pad[7];
+};
+
+/*
+ * Indirect blocks are a simple array of block refs. We rely on the
+ * number of references per indirect block being a power of 2, so check
+ * that at compile time.
+ */
+#define NGNFS_DATA_REFS_PER_BLK (NGNFS_BLOCK_SIZE / sizeof(struct ngnfs_block_ref))
+BUILD_BUG_ON(NGNFS_DATA_REFS_PER_BLK & (NGNFS_DATA_REFS_PER_BLK - 1));
+
+/*
+ * Because blocks per ref is based on the size of a struct, we can't do
+ * the smart thing and define the shift first and then the value, we
+ * have to go backwards and define the shift from the value instead.
+ */
+
+#define NGNFS_DATA_REFS_PER_BLK_SHIFT const_ilog2(NGNFS_DATA_REFS_PER_BLK)
+
+struct ngnfs_indirect_block {
+ struct ngnfs_block_ref refs[NGNFS_DATA_REFS_PER_BLK];
+};
+
/*
* Inodes are stored in inode blocks. Inode blocks numbers are directly
* calculated from the inode number. The block itself is formatted as a
@@ -121,6 +155,7 @@ struct ngnfs_inode {
__le64 crtime_nsec;
struct ngnfs_btree_root dirents;
struct ngnfs_btree_root xattrs;
+ struct ngnfs_data_root data;
};
#define NGNFS_ROOT_INO 1
diff --git a/shared/inode.c b/shared/inode.c
index 0e4edd2..9f58f8f 100644
--- a/shared/inode.c
+++ b/shared/inode.c
@@ -60,6 +60,7 @@ int ngnfs_inode_init(struct ngnfs_inode_txn_ref *itref, struct ngnfs_inode_ino_g
ngnfs_tblk_assign(tblk, ninode->crtime_nsec, ninode->atime_nsec);
ngnfs_tblk_memset(tblk, &ninode->dirents, 0, sizeof(ninode->dirents));
ngnfs_tblk_memset(tblk, &ninode->xattrs, 0, sizeof(ninode->xattrs));
+ ngnfs_tblk_memset(tblk, &ninode->data, 0, sizeof(ninode->data));
return 0;
}
diff --git a/shared/lk/fls.h b/shared/lk/fls.h
new file mode 100644
index 0000000..4b49f67
--- /dev/null
+++ b/shared/lk/fls.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SHARED_LK_FLS_H
+#define _SHARED_LK_FLS_H
+
+#include "shared/lk/bits.h"
+#include "shared/lk/bitops.h"
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned int __fls(unsigned long word)
+{
+ return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __always_inline int fls(unsigned int x)
+{
+ return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
+}
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#if BITS_PER_LONG == 32
+static __always_inline int fls64(__u64 x)
+{
+ __u32 h = x >> 32;
+ if (h)
+ return fls(h) + 32;
+ return fls(x);
+}
+#elif BITS_PER_LONG == 64
+static __always_inline int fls64(__u64 x)
+{
+ if (x == 0)
+ return 0;
+ return __fls(x) + 1;
+}
+#else
+#error BITS_PER_LONG not 32 or 64
+#endif
+
+static inline unsigned int fls_long(unsigned long l)
+{
+ if (sizeof(l) == 4)
+ return fls(l);
+ return fls64(l);
+}
+
+#endif
diff --git a/shared/lk/log2.h b/shared/lk/log2.h
new file mode 100644
index 0000000..2aa7297
--- /dev/null
+++ b/shared/lk/log2.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Integer base 2 logarithm calculation
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells at redhat.com)
+ */
+
+#ifndef _SHARED_LK_LOG2_H
+#define _SHARED_LK_LOG2_H
+
+#include "shared/lk/fls.h"
+#include "shared/lk/types.h"
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ * more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+#ifndef CONFIG_ARCH_HAS_ILOG2_U32
+static __always_inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+ return fls(n) - 1;
+}
+#endif
+
+#ifndef CONFIG_ARCH_HAS_ILOG2_U64
+static __always_inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+ return fls64(n) - 1;
+}
+#endif
+
+/**
+ * is_power_of_2() - check if a value is a power of two
+ * @n: the value to check
+ *
+ * Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ * Return: true if @n is a power of 2, otherwise false.
+ */
+static __always_inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+/**
+ * __roundup_pow_of_two() - round up to nearest power of two
+ * @n: value to round up
+ */
+static inline __attribute__((const))
+unsigned long __roundup_pow_of_two(unsigned long n)
+{
+ return 1UL << fls_long(n - 1);
+}
+
+/**
+ * __rounddown_pow_of_two() - round down to nearest power of two
+ * @n: value to round down
+ */
+static inline __attribute__((const))
+unsigned long __rounddown_pow_of_two(unsigned long n)
+{
+ return 1UL << (fls_long(n) - 1);
+}
+
+/**
+ * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
+ * @n: parameter
+ *
+ * Use this where sparse expects a true constant expression, e.g. for array
+ * indices.
+ */
+#define const_ilog2(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n) < 2 ? 0 : \
+ (n) & (1ULL << 63) ? 63 : \
+ (n) & (1ULL << 62) ? 62 : \
+ (n) & (1ULL << 61) ? 61 : \
+ (n) & (1ULL << 60) ? 60 : \
+ (n) & (1ULL << 59) ? 59 : \
+ (n) & (1ULL << 58) ? 58 : \
+ (n) & (1ULL << 57) ? 57 : \
+ (n) & (1ULL << 56) ? 56 : \
+ (n) & (1ULL << 55) ? 55 : \
+ (n) & (1ULL << 54) ? 54 : \
+ (n) & (1ULL << 53) ? 53 : \
+ (n) & (1ULL << 52) ? 52 : \
+ (n) & (1ULL << 51) ? 51 : \
+ (n) & (1ULL << 50) ? 50 : \
+ (n) & (1ULL << 49) ? 49 : \
+ (n) & (1ULL << 48) ? 48 : \
+ (n) & (1ULL << 47) ? 47 : \
+ (n) & (1ULL << 46) ? 46 : \
+ (n) & (1ULL << 45) ? 45 : \
+ (n) & (1ULL << 44) ? 44 : \
+ (n) & (1ULL << 43) ? 43 : \
+ (n) & (1ULL << 42) ? 42 : \
+ (n) & (1ULL << 41) ? 41 : \
+ (n) & (1ULL << 40) ? 40 : \
+ (n) & (1ULL << 39) ? 39 : \
+ (n) & (1ULL << 38) ? 38 : \
+ (n) & (1ULL << 37) ? 37 : \
+ (n) & (1ULL << 36) ? 36 : \
+ (n) & (1ULL << 35) ? 35 : \
+ (n) & (1ULL << 34) ? 34 : \
+ (n) & (1ULL << 33) ? 33 : \
+ (n) & (1ULL << 32) ? 32 : \
+ (n) & (1ULL << 31) ? 31 : \
+ (n) & (1ULL << 30) ? 30 : \
+ (n) & (1ULL << 29) ? 29 : \
+ (n) & (1ULL << 28) ? 28 : \
+ (n) & (1ULL << 27) ? 27 : \
+ (n) & (1ULL << 26) ? 26 : \
+ (n) & (1ULL << 25) ? 25 : \
+ (n) & (1ULL << 24) ? 24 : \
+ (n) & (1ULL << 23) ? 23 : \
+ (n) & (1ULL << 22) ? 22 : \
+ (n) & (1ULL << 21) ? 21 : \
+ (n) & (1ULL << 20) ? 20 : \
+ (n) & (1ULL << 19) ? 19 : \
+ (n) & (1ULL << 18) ? 18 : \
+ (n) & (1ULL << 17) ? 17 : \
+ (n) & (1ULL << 16) ? 16 : \
+ (n) & (1ULL << 15) ? 15 : \
+ (n) & (1ULL << 14) ? 14 : \
+ (n) & (1ULL << 13) ? 13 : \
+ (n) & (1ULL << 12) ? 12 : \
+ (n) & (1ULL << 11) ? 11 : \
+ (n) & (1ULL << 10) ? 10 : \
+ (n) & (1ULL << 9) ? 9 : \
+ (n) & (1ULL << 8) ? 8 : \
+ (n) & (1ULL << 7) ? 7 : \
+ (n) & (1ULL << 6) ? 6 : \
+ (n) & (1ULL << 5) ? 5 : \
+ (n) & (1ULL << 4) ? 4 : \
+ (n) & (1ULL << 3) ? 3 : \
+ (n) & (1ULL << 2) ? 2 : \
+ 1) : \
+ -1)
+
+/**
+ * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
+ * @n: parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+ __builtin_constant_p(n) ? \
+ ((n) < 2 ? 0 : \
+ 63 - __builtin_clzll(n)) : \
+ (sizeof(n) <= 4) ? \
+ __ilog2_u32(n) : \
+ __ilog2_u64(n) \
+ )
+
+/**
+ * roundup_pow_of_two - round the given value up to nearest power of two
+ * @n: parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ ((n) == 1) ? 1 : \
+ (1UL << (ilog2((n) - 1) + 1)) \
+ ) : \
+ __roundup_pow_of_two(n) \
+ )
+
+/**
+ * rounddown_pow_of_two - round the given value down to nearest power of two
+ * @n: parameter
+ *
+ * round the given value down to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define rounddown_pow_of_two(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (1UL << ilog2(n))) : \
+ __rounddown_pow_of_two(n) \
+ )
+
+static inline __attribute_const__
+int __order_base_2(unsigned long n)
+{
+ return n > 1 ? ilog2(n - 1) + 1 : 0;
+}
+
+/**
+ * order_base_2 - calculate the (rounded up) base 2 order of the argument
+ * @n: parameter
+ *
+ * The first few values calculated by this routine:
+ * ob2(0) = 0
+ * ob2(1) = 0
+ * ob2(2) = 1
+ * ob2(3) = 2
+ * ob2(4) = 2
+ * ob2(5) = 3
+ * ... and so on.
+ */
+#define order_base_2(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ ((n) == 0 || (n) == 1) ? 0 : \
+ ilog2((n) - 1) + 1) : \
+ __order_base_2(n) \
+)
+
+static inline __attribute__((const))
+int __bits_per(unsigned long n)
+{
+ if (n < 2)
+ return 1;
+ if (is_power_of_2(n))
+ return order_base_2(n) + 1;
+ return order_base_2(n);
+}
+
+/**
+ * bits_per - calculate the number of bits required for the argument
+ * @n: parameter
+ *
+ * This is constant-capable and can be used for compile time
+ * initializations, e.g bitfields.
+ *
+ * The first few values calculated by this routine:
+ * bf(0) = 1
+ * bf(1) = 1
+ * bf(2) = 2
+ * bf(3) = 2
+ * bf(4) = 3
+ * ... and so on.
+ */
+#define bits_per(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ ((n) == 0 || (n) == 1) \
+ ? 1 : ilog2(n) + 1 \
+ ) : \
+ __bits_per(n) \
+)
+#endif /* _LINUX_LOG2_H */
--
2.49.0
More information about the ngnfs-devel
mailing list