[PATCH 06/20] Add auto-generated "." and ".." entries to directories

Valerie Aurora val at versity.com
Thu Jun 12 13:10:58 PDT 2025


There are pros and cons to real or fake "." and ".." dentries, but we
decided to go with fake because we don't have to do a potentially slow
lookup of "." or ".." in large directories. Userspace likes "." and
".." to come first in readdir(), and we return entries in hash value
order, so reserve 0 and 1 in the hash output space for "." and "..".
Then autogenerate them in readdir() and lookup() using the parent
inode back reference. Also add an optional position argument to the
debugfs readdir() command to test correctness of handling readdir()
position.

Co-authored-by: Zach Brown <zab at zabbo.net>
Signed-off-by: Valerie Aurora <val at versity.com>
---
 cli/debugfs.c         | 16 ++++++++-
 shared/dir.c          | 75 ++++++++++++++++++++++++++++++++++++++++---
 shared/format-block.h |  5 +++
 3 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/cli/debugfs.c b/cli/debugfs.c
index acb2a73..08dfdce 100644
--- a/cli/debugfs.c
+++ b/cli/debugfs.c
@@ -20,6 +20,7 @@
 #include "shared/log.h"
 #include "shared/mkfs.h"
 #include "shared/mount.h"
+#include "shared/nerr.h"
 #include "shared/thread.h"
 
 #include "cli/cli.h"
@@ -79,13 +80,26 @@ static void cmd_readdir(struct debugfs_context *ctx, int argc, char **argv)
 	int ret;
 	int i;
 
+	if (argc > 2) {
+		printf("usage: readdir [position]\n");
+		return;
+	}
+
+	pos = 0;
+	if (argc == 2) {
+		ret = strtoull_nerr(&pos, argv[1], NULL, 0);
+		if (ret < 0) {
+			printf("invalid position: %s\n", argv[1]);
+			return;
+		}
+	}
+
 	buf = malloc(size);
 	if (!buf) {
 		printf("malloc error");
 		return;
 	}
 
-	pos = 0;
 	while (true) {
 		ret = ngnfs_dir_readdir(ctx->nfi, &ctx->cwd_ig, pos, buf, size);
 		if (ret <= 0) {
diff --git a/shared/dir.c b/shared/dir.c
index 570fbaf..3828d1b 100644
--- a/shared/dir.c
+++ b/shared/dir.c
@@ -42,9 +42,37 @@ static bool names_equal(u8 *a, size_t a_len, u8 *b, size_t b_len)
 	return a_len == b_len && memcmp(a, b, a_len) == 0;
 }
 
+/*
+ * The directory entries for . and .. are generated during lookup and
+ * readdir and are not "real" directory entries stored as dirents. For
+ * readdir to work properly, we need the position of each entry (its
+ * hash value) to be stable. We also want to generate . and .. first
+ * because it's easier than inserting them somewhere in the middle and
+ * because applications like it that way.
+ *
+ * The solution is to reserve the hash values 0 for . and 1 for .. so
+ * that we can return them first in readdir() and the positions returned
+ * by readdir are strictly ascending.
+ */
 static u64 name_hash(void *name, size_t name_len)
 {
-	return xxh64(name, name_len, NGNFS_DIRENT_HASH_SEED) & NGNFS_DIRENT_HASH_MASK;
+	char *s = name;
+	u64 hash;
+
+	if ((name_len < 3) && (name_len > 0) && s[0] == '.') {
+		if (name_len == 1)
+			return NGNFS_DIRENT_DOT_HASH;
+
+		if (s[1] == '.')
+			return NGNFS_DIRENT_DOT_DOT_HASH;
+	}
+
+	hash = xxh64(name, name_len, NGNFS_DIRENT_HASH_SEED) & NGNFS_DIRENT_HASH_MASK;
+
+	if (hash < NGNFS_DIRENT_MIN_HASH)
+		hash = NGNFS_DIRENT_MIN_HASH;
+
+	return hash;
 }
 
 /*
@@ -284,7 +312,7 @@ struct readdir_args {
 	int nr;
 };
 
-static int fill_dirent_rd(struct ngnfs_btree_key *key, void *val, size_t val_size, void *args)
+static int fill_readdir_rd(struct ngnfs_btree_key *key, void *val, size_t val_size, void *args)
 {
 	struct readdir_args *ra = args;
 	struct ngnfs_dirent *dent = val;
@@ -317,6 +345,45 @@ static int fill_dirent_rd(struct ngnfs_btree_key *key, void *val, size_t val_siz
 	return NGNFS_BTREE_ITER_CONTINUE;
 }
 
+static int dots_and_dents_read_iter(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+				    struct ngnfs_btree_root *root, struct ngnfs_inode_txn_ref *dir,
+				    struct ngnfs_btree_key *key, struct ngnfs_btree_key *next,
+				    struct ngnfs_btree_key *last,  ngnfs_btree_read_iter_fn_t iter,
+				    void *iter_arg)
+{
+	struct ngnfs_dirent dots;
+	struct ngnfs_btree_key tmp_key = *key;
+	u64 pos = le64_to_cpu(tmp_key.k[0]);
+	int ret;
+
+	while (pos <= NGNFS_DIRENT_DOT_DOT_HASH) {
+		if (last && pos > le64_to_cpu(last->k[0]))
+			break;
+
+		dots = (struct ngnfs_dirent) {
+			.pers_dtype = NGNFS_DT_DIR,
+			.name_len = pos == NGNFS_DIRENT_DOT_HASH ? 1 : 2,
+			.ig = pos == NGNFS_DIRENT_DOT_HASH ?
+			dir->ninode->ig : dir->ninode->parent_ig,
+			.name[0] = '.',
+			.name[1] = '.',
+		};
+
+		ret = iter(&tmp_key, &dots, offsetof(struct ngnfs_dirent, name[dots.name_len]),
+			   iter_arg);
+
+		if (ret != NGNFS_BTREE_ITER_CONTINUE)
+			goto out;
+
+		pos++;
+		tmp_key.k[0] = cpu_to_le64(pos);
+	}
+
+	ret = ngnfs_btree_read_iter(nfi, txn, root, &tmp_key, next, last, iter, iter_arg);
+out:
+	return ret;
+}
+
 /*
  * Read directory entries starting at the given position, filling
  * entries in the buffer until entries are exhausted or the buffer is
@@ -359,8 +426,8 @@ int ngnfs_dir_readdir(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *dir
 
 		ret = ngnfs_inode_get(nfi, &txn, NBF_READ, dir_ig, &dir)			?:
 		      check_ifmt(dir.ninode, S_IFDIR, -ENOTDIR)					?:
-		      ngnfs_btree_read_iter(nfi, &txn, &dir.ninode->dirents, &key,
-					    NULL, NULL, fill_dirent_rd, &ra);
+		      dots_and_dents_read_iter(nfi, &txn, &dir.ninode->dirents, &dir, &key,
+					       NULL, NULL, fill_readdir_rd, &ra);
 
 	} while (ngnfs_txn_retry(nfi, &txn, &ret));
 
diff --git a/shared/format-block.h b/shared/format-block.h
index 9c5221e..d3cc302 100644
--- a/shared/format-block.h
+++ b/shared/format-block.h
@@ -167,4 +167,9 @@ struct ngnfs_dirent {
  */
 #define NGNFS_DIRENT_HASH_MASK	(U64_MAX ^ (1ULL << 63) ^ NGNFS_DIRENT_COLL_BIT)
 
+/* reserved hash values for . and .. */
+#define NGNFS_DIRENT_DOT_HASH	 	0ULL
+#define NGNFS_DIRENT_DOT_DOT_HASH	1ULL
+#define NGNFS_DIRENT_MIN_HASH		2ULL
+
 #endif
-- 
2.49.0




More information about the ngnfs-devel mailing list