[PATCH 07/14] Add auto-generated "." and ".." entries to directories
Valerie Aurora
val at versity.com
Thu Feb 27 06:16:16 PST 2025
There are pros and cons to real or fake "." and ".." dentries, but we
decided to go with fake because we don't have to do a potentially slow
lookup of ".." in large directories. Userspace likes "." and ".." to
come first in readdir(), and we return entries in hash value order, so
reserve 0 and 1 in the hash output space for "." and "..". Then
autogenerate them in readdir() and lookup() using the parent inode
back reference.
Signed-off-by: Valerie Aurora <val at versity.com>
---
shared/dir.c | 115 ++++++++++++++++++++++++++++++++++++------
shared/format-block.h | 5 ++
2 files changed, 104 insertions(+), 16 deletions(-)
diff --git a/shared/dir.c b/shared/dir.c
index d969e10..0439816 100644
--- a/shared/dir.c
+++ b/shared/dir.c
@@ -42,9 +42,37 @@ static bool names_equal(u8 *a, size_t a_len, u8 *b, size_t b_len)
return a_len == b_len && memcmp(a, b, a_len) == 0;
}
-static u64 name_hash(void *name, size_t name_len)
-{
- return xxh64(name, name_len, NGNFS_DIRENT_HASH_SEED) & NGNFS_DIRENT_HASH_MASK;
+/*
+ * The directory entries for . and .. are generated during lookup and
+ * readdir and are not "real" directory entries stored as dirents. For
+ * readdir to work properly, we need the position of each entry (its
+ * hash value) to be stable. We also want to generate . and .. first
+ * because it's easier than inserting them somewhere in the middle and
+ * because applications like it that way.
+ *
+ * The solution is to reserve the hash values 0 for . and 1 for .. so
+ * that we can return them first in readdir() and the positions returned
+ * by readdir are strictly ascending.
+ */
+
+static u64 name_hash(void *name, size_t name_len) {
+ char *s = name;
+ u64 hash;
+
+ if ((name_len <= 2) && s[0] == '.') {
+ if (name_len == 1)
+ return NGNFS_DIRENT_DOT_HASH;
+
+ if (s[1] == '.')
+ return NGNFS_DIRENT_DOT_DOT_HASH;
+ }
+
+ hash = xxh64(name, name_len, NGNFS_DIRENT_HASH_SEED) & NGNFS_DIRENT_HASH_MASK;
+
+ if (hash < NGNFS_DIRENT_MIN_HASH)
+ hash = NGNFS_DIRENT_MIN_HASH;
+
+ return hash;
}
/*
@@ -105,6 +133,16 @@ unsigned int ngnfs_type_to_dtype(enum ngnfs_dentry_type type)
return DT_UNKNOWN;
}
+static void fill_dent(__le64 ino, __le64 version, enum ngnfs_dentry_type type, char *name,
+ size_t name_len, struct ngnfs_dirent *dent)
+{
+ dent->ino = ino;
+ dent->type = type;
+ dent->version = version;
+ dent->name_len = name_len;
+ memcpy(dent->name, name, name_len);
+}
+
static void init_dirent_args(struct dirent_args *da, char *name, size_t name_len, u64 ino,
mode_t mode)
{
@@ -112,18 +150,13 @@ static void init_dirent_args(struct dirent_args *da, char *name, size_t name_len
da->dent_size = offsetof(struct ngnfs_dirent, name) + name_len;
da->dtype = IFTODT(mode);
- da->dent.ino = cpu_to_le64(ino);
- da->dent.version = cpu_to_le64(0); /* XXX :/ */
- da->dent.type = mode_to_type(mode);
- da->dent.name_len = name_len;
+ fill_dent(cpu_to_le64(ino), 0 /* XXX :/ */, mode_to_type(mode), name, name_len, &da->dent);
/* ensure that we're stitching together a contiguous max name buffer */
BUILD_BUG_ON(offsetofend(struct dirent_args, dent.name) !=
offsetof(struct dirent_args, __max_name_storage));
BUILD_BUG_ON((sizeof_field(struct dirent_args, dent.name) +
sizeof_field(struct dirent_args, __max_name_storage)) != NGNFS_NAME_MAX);
-
- memcpy(da->dent.name, name, name_len);
}
static int update_dirent_args_ino(struct dirent_args *da, u64 ino)
@@ -279,16 +312,15 @@ struct readdir_args {
int nr;
};
-static int fill_dirent_rd(struct ngnfs_btree_key *key, void *val, size_t val_size, void *args)
+static int fill_readdir(struct ngnfs_btree_key *key, struct ngnfs_dirent *dent,
+ struct readdir_args *ra)
{
- struct readdir_args *ra = args;
- struct ngnfs_dirent *dent = val;
size_t aligned;
size_t bytes;
bytes = offsetof(struct ngnfs_readdir_entry, name[dent->name_len + 1]);
if (bytes > ra->size)
- return 0;
+ return -ENOBUFS;
aligned = ALIGN(bytes, __alignof__(struct ngnfs_readdir_entry));
@@ -304,12 +336,62 @@ static int fill_dirent_rd(struct ngnfs_btree_key *key, void *val, size_t val_siz
ra->nr++;
if (ra->nr == INT_MAX || aligned >= ra->size)
- return 0;
+ return -ENOBUFS;
ra->ent = (void *)ra->ent + aligned;
ra->size -= aligned;
- return NGNFS_BTREE_ITER_CONTINUE;
+ return 0;
+}
+
+static int fill_readdir_rd(struct ngnfs_btree_key *key, void *val, size_t val_size, void *args)
+{
+ int ret;
+ ret = fill_readdir(key, val, args);
+
+ if (ret == 0)
+ return NGNFS_BTREE_ITER_CONTINUE;
+
+ return 0;
+}
+
+static int fill_from_inode(u64 hash, struct ngnfs_inode_txn_ref *inode, char *name, size_t name_len,
+ struct ngnfs_dirent *dent, struct readdir_args *ra)
+{
+ struct ngnfs_btree_key key = { {cpu_to_le64(hash), 0, 0} };
+
+ fill_dent(inode->ninode->ino, inode->ninode->version,
+ mode_to_type(le32_to_cpu(inode->ninode->mode)), name, name_len, dent);
+
+ return fill_readdir(&key, dent, ra);
+}
+
+/*
+ * Fill in "." and ".." entries for readdir if it is the first read.
+ * Assumes that the buf in ra is large enough to hold both.
+ */
+static int fill_dots(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
+ struct ngnfs_btree_key *key, struct readdir_args *ra,
+ struct ngnfs_inode_txn_ref *dir)
+{
+ struct ngnfs_inode_txn_ref parent_dir;
+ struct ngnfs_dirent dent;
+ u64 parent_ino;
+ int ret;
+
+ if ((le64_to_cpu(key->k[0]) != 0))
+ return 0;
+
+ parent_ino = le64_to_cpu(dir->ninode->parent_ino);
+
+ ret = fill_from_inode(NGNFS_DIRENT_DOT_HASH, dir, ".", 1, &dent, ra) ?:
+ ngnfs_inode_get(nfi, txn, NBF_READ, parent_ino, &parent_dir) ?:
+ fill_from_inode(NGNFS_DIRENT_DOT_DOT_HASH, &parent_dir, "..", 2, &dent, ra);
+
+ if (ret == 0)
+ key->k[0] = cpu_to_le64(NGNFS_DIRENT_MIN_HASH);
+
+ return ret;
}
/*
@@ -354,8 +436,9 @@ int ngnfs_dir_readdir(struct ngnfs_fs_info *nfi, u64 dir_ino, u64 pos,
ret = ngnfs_inode_get(nfi, &txn, NBF_READ, dir_ino, &dir) ?:
check_ifmt(dir.ninode, S_IFDIR, -ENOTDIR) ?:
+ fill_dots(nfi, &txn, &key, &ra, &dir) ?:
ngnfs_btree_read_iter(nfi, &txn, &dir.ninode->dirents, &key,
- NULL, NULL, fill_dirent_rd, &ra);
+ NULL, NULL, fill_readdir_rd, &ra);
} while (ngnfs_txn_retry(nfi, &txn, &ret));
diff --git a/shared/format-block.h b/shared/format-block.h
index b0bc995..49481fe 100644
--- a/shared/format-block.h
+++ b/shared/format-block.h
@@ -153,4 +153,9 @@ struct ngnfs_dirent {
*/
#define NGNFS_DIRENT_HASH_MASK (U64_MAX ^ (1ULL << 63) ^ NGNFS_DIRENT_COLL_BIT)
+/* reserved hash values for . and .. */
+#define NGNFS_DIRENT_DOT_HASH 0ULL
+#define NGNFS_DIRENT_DOT_DOT_HASH 1ULL
+#define NGNFS_DIRENT_MIN_HASH 2ULL
+
#endif
--
2.48.1
More information about the ngnfs-devel
mailing list