[PATCH 20/23] Add set/get/remove xattrs with up to 503 bytes of name + value

Zach Brown zab at zabbo.net
Wed Apr 9 17:08:41 PDT 2025


On Fri, Apr 04, 2025 at 08:45:36PM +0200, Valerie Aurora wrote:
> Use the native btree items to implement short xattrs of up to 503
> bytes including both name and value (no null terminators). Add the
> debugfs commands to set, get, and remove xattrs with create and
> replace flags.


> +struct ngnfs_xattr {
> +	__le16 val_len;
> +	__u8 name_len;
> +	__u8 __pad[5];
> +	__u8 name[];
> +};

Like dirents, the name doesn't need to be aligned.  Get rid of the __pad
field and make the size of the name array pad out the struct to be u64
aligned.  As stored it might well be smaller (or more likely) larger. 

> +/*
> + * Maximum size of both xattr name and value added together.
> + *
> + * TODO: support much larger xattrs by storing in blocks instead of
> + * btree items.
> + */
> +#define NGNFS_XATTR_MAX_SIZE NGNFS_BTREE_MAX_VAL_SIZE

I won't quibble now, as we still need to finish this by adding the
larger value blocks, but these kinds of limits that are present in the
persistent structures should be in format-block.h. 

> +
> +static u64 xattr_hash(void *name, size_t name_len)
> +{
> +	/* XXX using the same #defines as dirents, should be different? */
> +	return xxh64(name, name_len, NGNFS_DIRENT_HASH_SEED) & NGNFS_DIRENT_HASH_MASK;
> +}

Yeah, it might as well have its own seed.  It's hard to imagine it
mattering, but best practice says we shouldn't share that kind of thing.
(And we'll be getting rid of the mask and collision bit, more soon!)

> +static size_t xattr_size(size_t name_len, size_t val_size) {
> +	return offsetof(struct ngnfs_xattr, name) + name_len + val_size;
> +}

Opening '{' on newline.

> +static void init_xattr_key(struct ngnfs_btree_key *key, u64 hash)
> +{
> +	*key = (struct ngnfs_btree_key) {
> +		.k[0] = cpu_to_le64(hash),
> +	};
> +}

Maybe this is a good place to talk about name hash collisions for
xattrs.  They're not like dirents.  Over in dirent land, we're using the
same storage for lookup and readdir.  Our lookup keys are limited by
readdir and f_pos.   So we mash the collision bit into a single value. 

With xattrs, we don't have to do that.  We have the full precision keys
to work with.  So, yes, we can have one key value be the hash of the
name.  But we can also make another key value be unique to each xattr by
having a counter that increments in the inode for each stored xattr.
It's used by insertion to avoid name hash collsions.  Lookup then
iterates over {hash,0}..{hash,~0} rather than {hash}..{hash|1} like it
does with dirents.

This lets us get rid of all the collision bit nonsense in the xattrs.

> +static int fill_xattr_rd(struct ngnfs_btree_key *key, void *val, size_t val_size, void *arg)
> +{
> +	struct xattr_args *xa = arg;
> +	struct ngnfs_xattr *xattr = val;
> +
> +	if (!xattr_names_equal(xattr->name, xattr->name_len, (u8 *) xa->name, xa->name_len))
> +		return NGNFS_BTREE_ITER_CONTINUE;
> +
> +	if (le16_to_cpu(xattr->val_len) > xa->val_size)
> +		return -ENOBUFS;
> +
> +	memcpy(xa->value, xattr->name + xattr->name_len, le16_to_cpu(xattr->val_len));
> +	xa->val_size = le16_to_cpu(xattr->val_len);

(I'd assign val_size first then use it as the argument to the memcpy(),
but it's trivial, take it or leave it.)

> +static int get_xattr(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
> +		     struct ngnfs_inode_txn_ref *ino, struct xattr_args *xa)
> +{
> +	struct ngnfs_btree_key key;
> +	struct ngnfs_btree_key last;
> +	int ret;
> +
> +	init_xattr_key(&key, xa->hash);
> +	init_xattr_key(&last, xa->hash | NGNFS_DIRENT_COLL_BIT);

With a second key field for unique xattr ids, this'd become:

	init_xattr_key(&key, xa->hash, 0);
	init_xattr_key(&last, xa->hash, U64_MAX);

> +ssize_t ngnfs_xattr_get(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig, char *name,
> +			void *value, size_t val_size)
> +{
> +	struct ngnfs_transaction txn;
> +	struct ngnfs_inode_txn_ref ino;
> +	struct xattr_args xa;
> +	size_t name_len;
> +	int ret;
> +
> +	name_len = strlen(name);
> +	if (name_len > XATTR_NAME_MAX)
> +		return -ERANGE;
> +
> +	ngnfs_txn_init(&txn);
> +	init_xattr_args(&xa, &ino, name, name_len, value, val_size, NULL, 0);
> +
> +	do {
> +		ret = ngnfs_inode_get(nfi, &txn, NBF_READ, ig, &ino) 			?:
> +		      get_xattr(nfi, &txn, &ino, &xa);
> +
> +	} while (ngnfs_txn_retry(nfi, &txn, &ret));

It makes me a bit nervous that we're not re-initializing the args each
time we start a retried loop.  In this case, I think xa->found can be
set in a previous iteration and then mistakenly trusted by a retry that
didn't find anything.

Do we want to establish a best-practice to always re-initialize?  I
think I'd rather end up worrying about performance impacts of excessive
re-initialization than subtle errors from stale arg data from previous
iterations.

> +static int insert_xattr_wr(struct ngnfs_btree_key *key, void *val, size_t val_size, void *arg,
> +			   struct ngnfs_btree_op *op)
> +{
> +	struct xattr_args *xa = arg;
> +	struct ngnfs_xattr *xattr = val;
> +
> +	if (xattr) {
> +		if (xattr_names_equal(xattr->name, xattr->name_len, (u8 *) xa->name, xa->name_len))
> +			return -EEXIST;
> +
> +		if (xa->hash == le64_to_cpu(key->k[0])) {
> +			if (xa->hash & NGNFS_DIRENT_COLL_BIT)
> +				return -ENOSPC;
> +			xa->hash |= NGNFS_DIRENT_COLL_BIT;
> +			return NGNFS_BTREE_ITER_CONTINUE;
> +		}
> +	}
> +
> +	op->insert = 1;
> +	op->val = xa->xattr;
> +	op->val_size = xa->xa_size;
> +	init_xattr_key(&op->key, xa->hash);
> +
> +	return 0;
> +}

If we got rid of the collision bit then insertion could always insert
after it has iterated over all the matching hash values without seeing a
matching name.  This would only work if the xattr id is strictly
increasing so each new xattr would always be inserted last.  That seems
doable (and worth adding comments for :)).

> +/*
> + * Use the most efficient btree operation to implement setxattr
> + * depending on which flags are set. Replace means only set if it
> + * exists, create means only set if it does NOT exist, and no flags mean
> + * set it regardless of whether it exists.
> + */
> +static int set_xattr(struct ngnfs_fs_info *nfi, struct ngnfs_transaction *txn,
> +		     struct xattr_args *xa)
> +{
> +	int ret;
> +
> +	if (xa->flags & XATTR_REPLACE)
> +		ret = replace_xattr(nfi, txn, xa);
> +	else if (xa->flags & XATTR_CREATE)
> +		ret = insert_xattr(nfi, txn, xa);
> +	else {
> +		ret = remove_xattr(nfi, txn, xa);
> +		if (ret < 0 && ret != -ENODATA)
> +			goto out;
> +		ret = insert_xattr(nfi, txn, xa);
> +	}
> +out:
> +	return ret;
> +}

Instead of testing the flags here and kicking off iterations with
callbacks for each flag variant, let's use a single iteration and test
the flags in the callback.  Should reduce the code by quite a bit.

> +int ngnfs_xattr_set(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig, char *name,
> +		    void *value, size_t val_size, int flags)
> +{
> +	struct ngnfs_transaction txn;
> +	struct ngnfs_inode_txn_ref ino;
> +	struct xattr_args xa;
> +	struct ngnfs_xattr *xattr;
> +	size_t xa_size;
> +	size_t name_len;
> +	int ret;
> +
> +	name_len = strlen(name);
> +	if (name_len > XATTR_NAME_MAX)
> +		return -ERANGE;
> +
> +	xa_size = xattr_size(name_len, val_size);
> +	if (xa_size > NGNFS_XATTR_MAX_SIZE)
> +		return -ERANGE;
> +
> +	xattr = kmalloc(xa_size, GFP_NOFS);
> +	if (!xattr)
> +		return -ENOMEM;
> +
> +	init_xattr_args(&xa, &ino, name, name_len, value, val_size, xattr, flags);
> +
> +	ngnfs_txn_init(&txn);
> +
> +	do {
> +		ret = ngnfs_inode_get(nfi, &txn, NBF_WRITE, ig, &ino) 			?:
> +		      set_xattr(nfi, &txn, &xa);
> +
> +	} while (ngnfs_txn_retry(nfi, &txn, &ret));

We should enforce the listxattr size limit on xattr name creation so
that we can't create names that can't be seen unless other xattrs are
deleted.  We'll need an inode field, sadly.

> +ssize_t ngnfs_xattr_get(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig, char *name,
> +			void *value, size_t val_size);
> +int ngnfs_xattr_remove(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig, char *name);
> +int ngnfs_xattr_set(struct ngnfs_fs_info *nfi, struct ngnfs_inode_ino_gen *ig, char *name,
> +		    void *value, size_t val_size, int flags);

Once of these things is not like the other :).  Given our small total
key and value size limits, let's stick to int return types for all these
functions.

- z



More information about the ngnfs-devel mailing list