[PATCH 16/53] ovl: drop dir lock for lookups in impure readdir
Amir Goldstein
amir73il at gmail.com
Sun Mar 15 06:51:27 PDT 2026
On Thu, Mar 12, 2026 at 10:49 PM NeilBrown <neilb at ownmail.net> wrote:
>
> From: NeilBrown <neil at brown.name>
>
> When performing an "impure" readdir, ovl needs to perform a lookup on some
> of the names that it found.
> With proposed locking changes it will not be possible to perform this
> lookup (in particular, not safe to wait for d_alloc_parallel()) while
> holding a lock on the directory.
>
> ovl doesn't really need the lock at this point.
Not exactly. see below.
> It has already iterated
> the directory and has cached a list of the contents. It now needs to
> gather extra information about some contents. It can do this without
> the lock.
>
> After gathering that info it needs to retake the lock for API
> correctness. After doing this it must check IS_DEADDIR() again to
> ensure readdir always returns -ENOENT on a removed directory.
>
> Note that while ->iterate_shared is called with a shared lock, ovl uses
> WRAP_DIR_ITER() so an exclusive lock is held and so we drop and retake
> that exclusive lock.
>
> As the directory is no longer locked in ovl_cache_update() we need
> dget_parent() to get a reference to the parent.
>
> Signed-off-by: NeilBrown <neil at brown.name>
> ---
> fs/overlayfs/readdir.c | 19 ++++++++++++-------
> 1 file changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index 1dcc75b3a90f..d5123b37921c 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -568,13 +568,12 @@ static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p,
> goto get;
> }
> if (p->len == 2) {
> - /* we shall not be moved */
> - this = dget(dir->d_parent);
> + this = dget_parent(dir);
> goto get;
> }
> }
> /* This checks also for xwhiteouts */
> - this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
> + this = lookup_one_unlocked(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
ovl_cache_update() is also called from ovl_iterate_merged() where inode
is locked.
> if (IS_ERR_OR_NULL(this) || !this->d_inode) {
> /* Mark a stale entry */
> p->is_whiteout = true;
> @@ -666,11 +665,12 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
> if (err)
> return err;
>
> + inode_unlock(path->dentry->d_inode);
> list_for_each_entry_safe(p, n, list, l_node) {
> if (!name_is_dot_dotdot(p->name, p->len)) {
> err = ovl_cache_update(path, p, true);
> if (err)
> - return err;
> + break;
> }
> if (p->ino == p->real_ino) {
> list_del(&p->l_node);
> @@ -680,14 +680,19 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
> struct rb_node *parent = NULL;
>
> if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
> - &newp, &parent)))
> - return -EIO;
> + &newp, &parent))) {
> + err = -EIO;
> + break;
> + }
>
> rb_link_node(&p->node, parent, newp);
> rb_insert_color(&p->node, root);
> }
> }
> - return 0;
> + inode_lock(path->dentry->d_inode);
> + if (IS_DEADDIR(path->dentry->d_inode))
> + err = -ENOENT;
> + return err;
> }
>
> static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
> --
You missed the fact that overlayfs uses the dir inode lock
to protect the readdir inode cache, so your patch introduces
a risk for storing a stale readdir cache when dir modify operations
invalidate the readdir cache version while lock is dropped
and also introduces memory leak when cache is stomped
without freeing cache created by a competing thread.
I think something like the untested patch below should fix this.
I did not look into ovl_iterate_merged() to see if it has a simple
fix and I am not 100% sure that this fix for impure dir is enough.
Thanks,
Amir.
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index d5123b37921c8..9e90064b252ce 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -702,15 +702,13 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
struct inode *inode = d_inode(dentry);
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct ovl_dir_cache *cache;
+ /* Snapshot version before ovl_dir_read_impure() drops i_rwsem */
+ u64 version = ovl_inode_version_get(inode);
cache = ovl_dir_cache(inode);
- if (cache && ovl_inode_version_get(inode) == cache->version)
+ if (cache && version == cache->version)
return cache;
- /* Impure cache is not refcounted, free it here */
- ovl_dir_cache_free(inode);
- ovl_set_dir_cache(inode, NULL);
-
cache = kzalloc_obj(struct ovl_dir_cache);
if (!cache)
return ERR_PTR(-ENOMEM);
@@ -721,6 +719,14 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
kfree(cache);
return ERR_PTR(res);
}
+
+ /*
+ * Impure cache is not refcounted, free it here.
+ * Also frees cache stored by concurrent readdir during i_rwsem drop.
+ */
+ ovl_dir_cache_free(inode);
+ ovl_set_dir_cache(inode, NULL);
+
if (list_empty(&cache->entries)) {
/*
* A good opportunity to get rid of an unneeded "impure" flag.
@@ -736,7 +742,7 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
return NULL;
}
- cache->version = ovl_inode_version_get(inode);
+ cache->version = version;
ovl_set_dir_cache(inode, cache);
return cache;
More information about the linux-afs
mailing list