[PATCH 10/23] Add auto-generated "." and ".." entries to directories

Zach Brown zab at zabbo.net
Wed Apr 9 13:11:11 PDT 2025


On Fri, Apr 04, 2025 at 08:45:26PM +0200, Valerie Aurora wrote:
> There are pros and cons to real or fake "." and ".." dentries, but we
> decided to go with fake because we don't have to do a potentially slow
> lookup of ".." in large directories. Userspace likes "." and ".." to
> come first in readdir(), and we return entries in hash value order, so
> reserve 0 and 1 in the hash output space for "." and "..". Then
> autogenerate them in readdir() and lookup() using the parent inode
> back reference. Also add an optional position argument to the debugfs
> readdir() command to test correctness of handling readdir() position.


> +/*
> + * Readdir helpers to fill in "." and ".."
> + */
> +static void fill_dot_dirent(u64 pos, struct ngnfs_dirent *dent, struct ngnfs_inode_txn_ref *dir)
> +{
> +	dent->pers_dtype = NGNFS_DT_DIR;
> +	dent->name[0] = '.';
> +
> +	if (pos == NGNFS_DIRENT_DOT_HASH) {
> +		dent->name_len = 1;
> +		dent->ig = dir->ninode->ig;
> +	} else {
> +		dent->ig = dir->ninode->parent_ig;
> +		dent->name[1] = '.';
> +		dent->name_len = 2;
> +	}
> +}
> +
> +static int fill_dots(struct ngnfs_btree_key *key, struct readdir_args *ra,
> +		     struct ngnfs_inode_txn_ref *dir)
> +{
> +	struct ngnfs_dirent dent;
> +	int ret;
> +
> +	if (le64_to_cpu(key->k[0]) >= NGNFS_DIRENT_MIN_HASH)
> +		return 0;
> +
> +	fill_dot_dirent(le64_to_cpu(key->k[0]), &dent, dir);
> +	ret = fill_readdir_rd(key, &dent, offsetof(struct ngnfs_dirent, name) + dent.name_len, ra);
> +	BUG_ON(ret == 0); /* 0 means out of space, should never happen due to min buf size */
> +
> +	if (ret == NGNFS_BTREE_ITER_CONTINUE) {
> +		key->k[0] = cpu_to_le64(le64_to_cpu(key->k[0]) + 1);
> +		ret = 0;
> +	}
> +
> +	return ret;
> +}
> +
> +static int fill_dot_dirents(struct ngnfs_btree_key *key, struct readdir_args *ra,
> +			    struct ngnfs_inode_txn_ref *dir)
> +{
> +	int ret;
> +
> +	/* call once to fill "." and once for ".." */
> +	ret = fill_dots(key, ra, dir) ?:
> +	      fill_dots(key, ra, dir);
> +
> +	return ret;
> +}
> +
>  /*
>   * Read directory entries starting at the given position, filling
>   * entries in the buffer until entries are exhausted or the buffer is
> @@ -359,8 +438,9 @@ int ngnfs_dir_readdir(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *dir
>  
>  		ret = ngnfs_inode_get(nfi, &txn, NBF_READ, dir_ig, &dir)			?:
>  		      check_ifmt(dir.ninode, S_IFDIR, -ENOTDIR)					?:
> +		      fill_dot_dirents(&key, &ra, &dir)						?:
>  		      ngnfs_btree_read_iter(nfi, &txn, &dir.ninode->dirents, &key,
> -					    NULL, NULL, fill_dirent_rd, &ra);
> +					    NULL, NULL, fill_readdir_rd, &ra);
>  
>  	} while (ngnfs_txn_retry(nfi, &txn, &ret));
>  

It took me a while to wrap my brain around why all this code made me
uneasy.  It's functionally injecting two synthetic dent btree items, but
doing it with another call chain.  It was the shadow second call to
fill_readdir_rd that made it click for me.

We should refactor this into an ngnfs_btree_read_iter wrapper that adds
iter function calls with the fake dots eirents, if needed.   Then we
just call it instead of the normal btree read iter and drop the
fill_dot_dirents sequence. 

Launch all pseudo code!

static int dots_and_dents_read_iter(nfi, txn, root, dir, key, next, last, iter, iter_arg)
{
	struct ngnfs_dirent tmp_key = *key;
	struct ngnfs_dirent dots;

	while (tmp_key.k[0] <= _DOT_DOT_HASH && tmp_key.k[0] <= last->k[0]) {
		dots = (struct ngnfs_dirent) {
			.type = DIR,
			.name_len = tmp_key.k[0] == _DOT_HASH ? 1 : 2;
			.ig = tmp_key.k[0] == _DOT_HASH ? dir->n->ig : dir->n->parent_ig;
			.name[0] = '.',
			.name[1] = '.',
		};

		ret = iter(&tmp_key, &dots, offsetof(dots.name[dots.name_len]), iter_arg);
		if (ret != _CONTINUE)
			goto out;

		tmp_key.k[0]++;
	}

	ret = ngnfs_btree_read_iter(nfi, txn, root, &tmp_key, next, last, iter, itar_arg);
out:
	return ret;
}

This feels cleaner because it's clearly a read iteration but with the
two fake dots entries added in, when needed.  Critically, it should let
readdir and lookup both use this instead of each having their own
synthetic dot injection.

(The redundant dots initialization is a little irritating, but we have
to touch most of it in both cases anyway so.. meh, simpler this way.
And it'd be fun to see if compilers notice and care enough to hoist the
stores out of the loop :))

- z



More information about the ngnfs-devel mailing list