[PATCH 52/53] VFS: lift d_alloc_parallel above inode_lock
NeilBrown
neilb at ownmail.net
Thu Mar 12 14:12:39 PDT 2026
From: NeilBrown <neil at brown.name>
d_alloc_parallel() can block waiting on a d_in_lookup() dentry
so it is important to order it consistently with other blocking locks
such as inode_lock().
Currenty d_alloc_parallel() is ordered after inode_lock(): it can be
called while the inode is locked, and so the inode cannot be locked
while a d_in_lookup() dentry is held.
This patch reverses that order. d_alloc_parallel() must now be called
*before* locking the directory, and must not be called afterwards. This
allows directory locking to be moved closer to the filesystem
operations, and ultimately into those operations.
lookup_one_qstr_excl() is now called without an lock held, exclusive or
otherwise, so the "_excl" is dropped - it is now lookup_one_qstr().
As a lock is taken *after* lookup, start_dirop() and start_renaming()
must ensure that if the dentry isn't d_in_lookup() that after the lock
is taken the parent is still correct and the dentry is still hashed.
lookup_one_qstr() and lookup_slow() don't need to re-check the parent as
the dentry is always d_in_lookup() so parent cannot change.
The locking in lookup_slow() is moved into __lookup_slow() immediately
before/after ->lookup, and lookup_slow() just sets the task state for
waiting.
Parent locking is removed from open_last_lookups() and performed in
lookup_open(). A shared lock is taken if ->lookup() needs to be called.
An exclusive lock is taken separately if ->create() needs to be called -
with checks that the dentry hasn't become positive.
If ->atomic_open is needed we take exclusive or shared parent lock as
appropriate and check for a positive dentry or DEAD parent.
The fsnotify_create() call is kept inside the locked region in
lookup_open(). I don't know if this is important.
Signed-off-by: NeilBrown <neil at brown.name>
---
fs/namei.c | 239 ++++++++++++++++++++++++++++++++++-------------------
1 file changed, 154 insertions(+), 85 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index bba419f2fc53..3d213070a515 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1773,8 +1773,19 @@ static struct dentry *lookup_dcache(const struct qstr *name,
return dentry;
}
+static inline bool inode_lock_shared_state(struct inode *inode, unsigned int state)
+{
+ if (state == TASK_KILLABLE) {
+ if (down_read_killable(&inode->i_rwsem) != 0) {
+ return false;
+ }
+ } else {
+ inode_lock_shared(inode);
+ }
+ return true;
+}
+
/*
- * Parent directory has inode locked.
* If Lookup_EXCL or LOOKUP_RENAME_TARGET is set
* d_lookup_done() must be called before the dentry is dput()
* If the dentry is not d_in_lookup():
@@ -1783,8 +1794,9 @@ static struct dentry *lookup_dcache(const struct qstr *name,
* If it is d_in_lookup() then these conditions can only be checked by the
* file system when carrying out the intent (create or rename).
*/
-static struct dentry *lookup_one_qstr_excl(const struct qstr *name,
- struct dentry *base, unsigned int flags)
+static struct dentry *lookup_one_qstr(const struct qstr *name,
+ struct dentry *base, unsigned int flags,
+ unsigned int state)
{
struct dentry *dentry;
struct dentry *old;
@@ -1806,7 +1818,16 @@ static struct dentry *lookup_one_qstr_excl(const struct qstr *name,
/* Raced with another thread which did the lookup */
goto found;
- old = dir->i_op->lookup(dir, dentry, flags);
+ if (!inode_lock_shared_state(dir, state)) {
+ d_lookup_done(dentry);
+ dput(dentry);
+ return ERR_PTR(-EINTR);
+ }
+ if (unlikely(IS_DEADDIR(dir)))
+ old = ERR_PTR(-ENOENT);
+ else
+ old = dir->i_op->lookup(dir, dentry, flags | LOOKUP_SHARED);
+ inode_unlock_shared(dir);
if (unlikely(old)) {
d_lookup_done(dentry);
dput(dentry);
@@ -1897,7 +1918,8 @@ static struct dentry *lookup_fast(struct nameidata *nd)
/* Fast lookup failed, do it the slow way */
static struct dentry *__lookup_slow(const struct qstr *name,
struct dentry *dir,
- unsigned int flags)
+ unsigned int flags,
+ unsigned int state)
{
struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
@@ -1920,8 +1942,17 @@ static struct dentry *__lookup_slow(const struct qstr *name,
dput(dentry);
dentry = ERR_PTR(error);
}
+ } else if (!inode_lock_shared_state(inode, state)) {
+ d_lookup_done(dentry);
+ dput(dentry);
+ return ERR_PTR(-EINTR);
} else {
- old = inode->i_op->lookup(inode, dentry, flags);
+ if (unlikely(IS_DEADDIR(inode)))
+ old = ERR_PTR(-ENOENT);
+ else
+ old = inode->i_op->lookup(inode, dentry,
+ flags | LOOKUP_SHARED);
+ inode_unlock_shared(inode);
d_lookup_done(dentry);
if (unlikely(old)) {
dput(dentry);
@@ -1935,26 +1966,14 @@ static noinline struct dentry *lookup_slow(const struct qstr *name,
struct dentry *dir,
unsigned int flags)
{
- struct inode *inode = dir->d_inode;
- struct dentry *res;
- inode_lock_shared(inode);
- res = __lookup_slow(name, dir, flags | LOOKUP_SHARED);
- inode_unlock_shared(inode);
- return res;
+ return __lookup_slow(name, dir, flags | LOOKUP_SHARED, TASK_NORMAL);
}
static struct dentry *lookup_slow_killable(const struct qstr *name,
struct dentry *dir,
unsigned int flags)
{
- struct inode *inode = dir->d_inode;
- struct dentry *res;
-
- if (inode_lock_shared_killable(inode))
- return ERR_PTR(-EINTR);
- res = __lookup_slow(name, dir, flags | LOOKUP_SHARED);
- inode_unlock_shared(inode);
- return res;
+ return __lookup_slow(name, dir, flags | LOOKUP_SHARED, TASK_KILLABLE);
}
static inline int may_lookup(struct mnt_idmap *idmap,
@@ -2908,18 +2927,26 @@ static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name,
struct dentry *dentry;
struct inode *dir = d_inode(parent);
- if (state == TASK_KILLABLE) {
- int ret = down_write_killable_nested(&dir->i_rwsem,
- I_MUTEX_PARENT);
- if (ret)
- return ERR_PTR(ret);
- } else {
- inode_lock_nested(dir, I_MUTEX_PARENT);
- }
- dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
- if (IS_ERR(dentry))
+ while(1) {
+ dentry = lookup_one_qstr(name, parent, lookup_flags, state);
+ if (IS_ERR(dentry))
+ return dentry;
+ if (state == TASK_KILLABLE) {
+ if (down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT) != 0) {
+ d_lookup_done(dentry);
+ dput(dentry);
+ return ERR_PTR(-EINTR);
+ }
+ } else {
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ }
+ if (d_in_lookup(dentry) ||
+ (!d_unhashed(dentry) && dentry->d_parent == parent))
+ return dentry;
inode_unlock(dir);
- return dentry;
+ d_lookup_done(dentry);
+ dput(dentry);
+ }
}
/**
@@ -3830,26 +3857,37 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
if (rd->flags & RENAME_NOREPLACE)
target_flags |= LOOKUP_EXCL;
- trap = lock_rename(rd->old_parent, rd->new_parent);
- if (IS_ERR(trap))
- return PTR_ERR(trap);
-
- d1 = lookup_one_qstr_excl(old_last, rd->old_parent,
- lookup_flags);
+retry:
+ d1 = lookup_one_qstr(old_last, rd->old_parent,
+ lookup_flags, TASK_NORMAL);
err = PTR_ERR(d1);
if (IS_ERR(d1))
- goto out_unlock;
+ goto out_err;
- d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
- lookup_flags | target_flags);
+ d2 = lookup_one_qstr(new_last, rd->new_parent,
+ lookup_flags | target_flags, TASK_NORMAL);
err = PTR_ERR(d2);
if (IS_ERR(d2))
goto out_dput_d1;
+ trap = lock_rename(rd->old_parent, rd->new_parent);
+ err = PTR_ERR(trap);
+ if (IS_ERR(trap))
+ goto out_unlock;
+
+ if (unlikely((!d_in_lookup(d1) && d_unhashed(d1)) || d1->d_parent != rd->old_parent ||
+ (!d_in_lookup(d2) && d_unhashed(d2)) || d2->d_parent != rd->new_parent)) {
+ unlock_rename(rd->old_parent, rd->new_parent);
+ d_lookup_done(d1); dput(d1);
+ d_lookup_done(d2); dput(d2);
+ dput(trap);
+ goto retry;
+ }
+
if (d1 == trap) {
/* source is an ancestor of target */
err = -EINVAL;
- goto out_dput_d2;
+ goto out_unlock;
}
if (d2 == trap) {
@@ -3858,7 +3896,7 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
err = -EINVAL;
else
err = -ENOTEMPTY;
- goto out_dput_d2;
+ goto out_unlock;
}
rd->old_dentry = d1;
@@ -3866,14 +3904,14 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
dget(rd->old_parent);
return 0;
-out_dput_d2:
+out_unlock:
+ unlock_rename(rd->old_parent, rd->new_parent);
d_lookup_done(d2);
dput(d2);
out_dput_d1:
d_lookup_done(d1);
dput(d1);
-out_unlock:
- unlock_rename(rd->old_parent, rd->new_parent);
+out_err:
return err;
}
@@ -3927,10 +3965,22 @@ __start_renaming_dentry(struct renamedata *rd, int lookup_flags,
if (rd->flags & RENAME_NOREPLACE)
target_flags |= LOOKUP_EXCL;
- /* Already have the dentry - need to be sure to lock the correct parent */
+retry:
+ d2 = lookup_one_qstr(new_last, rd->new_parent,
+ lookup_flags | target_flags, TASK_NORMAL);
+ err = PTR_ERR(d2);
+ if (IS_ERR(d2))
+ goto out_unlock;
+
+ /*
+ * Already have the old_dentry - need to be sure to lock
+ * the correct parent
+ */
trap = lock_rename_child(old_dentry, rd->new_parent);
+ err = PTR_ERR(trap);
if (IS_ERR(trap))
- return PTR_ERR(trap);
+ goto out_dput_d2;
+
if (d_unhashed(old_dentry) ||
(rd->old_parent && rd->old_parent != old_dentry->d_parent)) {
/* dentry was removed, or moved and explicit parent requested */
@@ -3938,16 +3988,19 @@ __start_renaming_dentry(struct renamedata *rd, int lookup_flags,
goto out_unlock;
}
- d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
- lookup_flags | target_flags);
- err = PTR_ERR(d2);
- if (IS_ERR(d2))
- goto out_unlock;
+ if (unlikely((!d_in_lookup(d2) && d_unhashed(d2)) ||
+ d2->d_parent != rd->new_parent)) {
+ /* d2 was moved/removed before lock - repeat lookup */
+ unlock_rename(old_dentry->d_parent, rd->new_parent);
+ d_lookup_done(d2); dput(d2);
+ dput(trap);
+ goto retry;
+ }
if (old_dentry == trap) {
/* source is an ancestor of target */
err = -EINVAL;
- goto out_dput_d2;
+ goto out_unlock;
}
if (d2 == trap) {
@@ -3956,7 +4009,7 @@ __start_renaming_dentry(struct renamedata *rd, int lookup_flags,
err = -EINVAL;
else
err = -ENOTEMPTY;
- goto out_dput_d2;
+ goto out_unlock;
}
rd->old_dentry = dget(old_dentry);
@@ -3964,11 +4017,11 @@ __start_renaming_dentry(struct renamedata *rd, int lookup_flags,
rd->old_parent = dget(old_dentry->d_parent);
return 0;
+out_unlock:
+ unlock_rename(old_dentry->d_parent, rd->new_parent);
out_dput_d2:
d_lookup_done(d2);
dput(d2);
-out_unlock:
- unlock_rename(old_dentry->d_parent, rd->new_parent);
return err;
}
@@ -4319,8 +4372,19 @@ static struct dentry *atomic_open(const struct path *path, struct dentry *dentry
file->__f_path.dentry = DENTRY_NOT_SET;
file->__f_path.mnt = path->mnt;
- error = dir->i_op->atomic_open(dir, dentry, file,
- open_to_namei_flags(open_flag), mode);
+
+ if (open_flag & O_CREAT)
+ inode_lock(dir);
+ else
+ inode_lock_shared(dir);
+ if (dentry->d_inode)
+ error = finish_no_open(file, NULL);
+ else if (unlikely(IS_DEADDIR(dir)))
+ error = -ENOENT;
+ else
+ error = dir->i_op->atomic_open(dir, dentry, file,
+ open_to_namei_flags(open_flag),
+ mode);
d_lookup_done(dentry);
if (!error) {
if (file->f_mode & FMODE_OPENED) {
@@ -4339,6 +4403,13 @@ static struct dentry *atomic_open(const struct path *path, struct dentry *dentry
error = -ENOENT;
}
}
+ if (!error && (file->f_mode & FMODE_CREATED))
+ fsnotify_create(dir, dentry);
+ if (open_flag & O_CREAT)
+ inode_unlock(dir);
+ else
+ inode_unlock_shared(dir);
+
if (error) {
dput(dentry);
dentry = ERR_PTR(error);
@@ -4372,10 +4443,6 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
- unsigned int shared_flag = (op->open_flag & O_CREAT) ? 0 : LOOKUP_SHARED;
-
- if (unlikely(IS_DEADDIR(dir_inode)))
- return ERR_PTR(-ENOENT);
file->f_mode &= ~FMODE_CREATED;
dentry = d_lookup(dir, &nd->last);
@@ -4420,7 +4487,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
if (open_flag & O_CREAT) {
if (open_flag & O_EXCL)
open_flag &= ~O_TRUNC;
- mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode);
+ mode = vfs_prepare_mode(idmap, dir_inode, mode, mode, mode);
if (likely(got_write))
create_error = may_o_create(idmap, &nd->path,
dentry, mode);
@@ -4439,8 +4506,15 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
}
if (d_in_lookup(dentry)) {
- struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
- nd->flags | shared_flag);
+ struct dentry *res;
+
+ inode_lock_shared(dir_inode);
+ if (IS_DEADDIR(dir_inode))
+ res = ERR_PTR(-ENOENT);
+ else
+ res = dir_inode->i_op->lookup(dir_inode, dentry,
+ nd->flags | LOOKUP_SHARED);
+ inode_unlock_shared(dir_inode);
d_lookup_done(dentry);
if (unlikely(res)) {
if (IS_ERR(res)) {
@@ -4459,15 +4533,22 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
if (error)
goto out_dput;
- file->f_mode |= FMODE_CREATED;
- audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
- if (!dir_inode->i_op->create) {
- error = -EACCES;
- goto out_dput;
- }
+ inode_lock(dir_inode);
+ if (!dentry->d_inode && !unlikely(IS_DEADDIR(dir_inode))) {
+ file->f_mode |= FMODE_CREATED;
+ audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
+ if (!dir_inode->i_op->create) {
+ error = -EACCES;
+ goto out_dput;
+ }
- error = dir_inode->i_op->create(idmap, dir_inode, dentry,
- mode, open_flag & O_EXCL);
+ error = dir_inode->i_op->create(idmap, dir_inode, dentry,
+ mode, open_flag & O_EXCL);
+ if (!error)
+ fsnotify_create(dir_inode, dentry);
+ } else if (!dentry->d_inode)
+ error = -ENOENT;
+ inode_unlock(dir_inode);
if (error)
goto out_dput;
}
@@ -4522,7 +4603,6 @@ static const char *open_last_lookups(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
struct delegated_inode delegated_inode = { };
- struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool got_write = false;
struct dentry *dentry;
@@ -4562,22 +4642,11 @@ static const char *open_last_lookups(struct nameidata *nd,
* dropping this one anyway.
*/
}
- if (open_flag & O_CREAT)
- inode_lock(dir->d_inode);
- else
- inode_lock_shared(dir->d_inode);
dentry = lookup_open(nd, file, op, got_write, &delegated_inode);
if (!IS_ERR(dentry)) {
- if (file->f_mode & FMODE_CREATED)
- fsnotify_create(dir->d_inode, dentry);
if (file->f_mode & FMODE_OPENED)
fsnotify_open(file);
}
- if (open_flag & O_CREAT)
- inode_unlock(dir->d_inode);
- else
- inode_unlock_shared(dir->d_inode);
-
if (got_write)
mnt_drop_write(nd->path.mnt);
--
2.50.0.107.gf914562f5916.dirty
More information about the linux-afs
mailing list