[PATCH 2/2] ubifs: Fix ABBA deadlock between inode reclaiming and deleted inode writing
Zhihao Cheng
chengzhihao1 at huawei.com
Tue Jul 9 19:26:28 PDT 2024
The inodes reclaiming process(See function prune_icache_sb) collects
all reclaimable inodes and mark them with I_FREEING flag at first, at
that time, other processes will be stuck if they try getting these
inodes(See function find_inode_fast), then the reclaiming process
destroy the inodes.
In deleted inode writing function ubifs_jnl_write_inode(), UBIFS holds
BASEHD's wbuf->io_mutex while traversing all xattr inodes, which could
race with inodes reclaiming process(The reclaiming process could try
locking BASEHD's wbuf->io_mutex in inode evicting function), then an
ABBA deadlock problem would happens as following:
1. File A has inode ia and a xattr(with inode ixa), regular file B has
inode ib and a xattr.
2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa
3. Then, following three processes running like this:
PA PB PC
echo 2 > /proc/sys/vm/drop_caches
// ib and ia area added into lru, lru->ixa->ib->ia
shrink_slab
prune_icache_sb
list_lru_walk_one
inode_lru_isolate
ixa->inode->i_state |= I_FREEING // set inode state
inode_lru_isolate
__iget(ib)
spin_unlock(&ib->i_lock)
spin_unlock(lru_lock)
rm file B
ib->nlink = 0
iput(ib)
rm file A
iput(ia)
ubifs_evict_inode(ia)
ubifs_jnl_delete_inode(ia)
ubifs_jnl_write_inode(ia)
make_reservation(BASEHD) // Lock wbuf->io_mutex
ubifs_iget(ixa->i_ino)
iget_locked
find_inode_fast
__wait_on_freeing_inode(ixa)
| iput(ib) // ib->nlink is 0, do evict
| ubifs_evict_inode
| ubifs_jnl_delete_inode(ib)
↓ ubifs_jnl_write_inode
ABBA deadlock ←-----make_reservation(BASEHD)
dispose_list // cannot be executed by prune_icache_sb
wake_up_bit(&inode->i_state)
The root cause is that UBIFS tries getting xattr inode with holding
BASEHD's wbuf->io_mutex, so the problem can be fixed by getting all
xattr inodes before locking BASEHD's wbuf->io_mutex.
Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files")
Cc: stable at vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022
Signed-off-by: Zhihao Cheng <chengzhihao1 at huawei.com>
---
fs/ubifs/journal.c | 110 +++++++++++++++++++++++++++------------------
1 file changed, 67 insertions(+), 43 deletions(-)
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index a5c7c499cc29..7556d87934b7 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -971,7 +971,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
*/
int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
{
- int err, lnum, offs;
+ int err, lnum, offs, i = 0;
+ struct inode **xinos = NULL;
struct ubifs_ino_node *ino, *ino_start;
struct ubifs_inode *ui = ubifs_inode(inode);
int sync = 0, write_len = 0, ilen = UBIFS_INO_NODE_SZ;
@@ -982,44 +983,24 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
if (kill_xattrs) {
+ union ubifs_key key;
+ struct fscrypt_name nm = {0};
+ struct ubifs_dent_node *xent, *pxent = NULL;
+ struct super_block *sb = c->vfs_sb;
+
if (ui->xattr_cnt > ubifs_xattr_max_cnt(c)) {
ubifs_err(c, "Cannot delete inode, it has too much xattrs!");
ubifs_ro_mode(c, err);
return -EPERM;
}
- }
-
- /*
- * If the inode is being deleted, do not write the attached data. No
- * need to synchronize the write-buffer either.
- */
- if (!last_reference) {
- ilen += ui->data_len;
- sync = IS_SYNC(inode);
- } else if (kill_xattrs) {
- write_len += UBIFS_INO_NODE_SZ * ui->xattr_cnt;
- }
-
- if (ubifs_authenticated(c))
- write_len += ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
- else
- write_len += ilen;
- ino_start = ino = kmalloc(write_len, GFP_NOFS);
- if (!ino)
- return -ENOMEM;
-
- /* Make reservation before allocating sequence numbers */
- err = make_reservation(c, BASEHD, write_len);
- if (err)
- goto out_free;
-
- if (kill_xattrs) {
- union ubifs_key key;
- struct fscrypt_name nm = {0};
- struct inode *xino;
- struct ubifs_dent_node *xent, *pxent = NULL;
+ xinos = kmalloc(sizeof(*xinos) * ui->xattr_cnt, GFP_NOFS);
+ if (!xinos) {
+ ubifs_err(c, "Cannot allocate memory for xattr inodes");
+ return -ENOMEM;
+ }
+ i = 0;
lowest_xent_key(c, &key, inode->i_ino);
while (1) {
xent = ubifs_tnc_next_ent(c, &key, &nm);
@@ -1029,34 +1010,71 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
break;
kfree(pxent);
- goto out_release;
+ goto out_iput;
}
fname_name(&nm) = xent->name;
fname_len(&nm) = le16_to_cpu(xent->nlen);
- xino = ubifs_iget(c->vfs_sb, le64_to_cpu(xent->inum));
- if (IS_ERR(xino)) {
- err = PTR_ERR(xino);
+ xinos[i] = ubifs_iget(sb, le64_to_cpu(xent->inum));
+ if (IS_ERR(xinos[i])) {
+ err = PTR_ERR(xinos[i]);
ubifs_err(c, "dead directory entry '%s', error %d",
xent->name, err);
ubifs_ro_mode(c, err);
kfree(pxent);
kfree(xent);
- goto out_release;
+ goto out_iput;
}
- ubifs_assert(c, ubifs_inode(xino)->xattr);
-
- clear_nlink(xino);
- pack_inode(c, ino, xino, 0);
- ino = (void *)ino + UBIFS_INO_NODE_SZ;
- iput(xino);
+ ubifs_assert(c, ubifs_inode(xinos[i])->xattr);
+ ubifs_assert(c, i < ui->xattr_cnt);
kfree(pxent);
pxent = xent;
key_read(c, &xent->key, &key);
+ i++;
}
kfree(pxent);
+ ubifs_assert(c, i == ui->xattr_cnt);
+ }
+
+ /*
+ * If the inode is being deleted, do not write the attached data. No
+ * need to synchronize the write-buffer either.
+ */
+ if (!last_reference) {
+ ilen += ui->data_len;
+ sync = IS_SYNC(inode);
+ } else if (kill_xattrs) {
+ write_len += UBIFS_INO_NODE_SZ * ui->xattr_cnt;
+ }
+
+ if (ubifs_authenticated(c))
+ write_len += ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
+ else
+ write_len += ilen;
+
+ ino_start = ino = kmalloc(write_len, GFP_NOFS);
+ if (!ino) {
+ err = -ENOMEM;
+ goto out_iput;
+ }
+
+ /* Make reservation before allocating sequence numbers */
+ err = make_reservation(c, BASEHD, write_len);
+ if (err)
+ goto out_free;
+
+ if (kill_xattrs) {
+ for (i = 0; i < ui->xattr_cnt; i++) {
+ clear_nlink(xinos[i]);
+ pack_inode(c, ino, xinos[i], 0);
+ ino = (void *)ino + UBIFS_INO_NODE_SZ;
+ iput(xinos[i]);
+ }
+
+ kfree(xinos);
+ xinos = NULL;
}
pack_inode(c, ino, inode, 1);
@@ -1103,6 +1121,12 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
finish_reservation(c);
out_free:
kfree(ino_start);
+out_iput:
+ if (xinos) {
+ while (--i >= 0)
+ iput(xinos[i]);
+ kfree(xinos);
+ }
return err;
}
--
2.39.2
More information about the linux-mtd
mailing list