[PATCH v4 07/30] netfs: Replace wb_lock with a bit lock for asynchronicity

David Howells dhowells at redhat.com
Tue Jun 16 03:07:56 PDT 2026


The netfs_inode::wb_lock mutex is used to prevent multiple simultaneous
writebacks from fighting each other (a writeback thread will write multiple
discontiguous regions within the same request).  The mutex, however, only
serialises the issuing of subrequests; it doesn't serialise the collection
of results, and, in particular, the updating of file size information and
fscache populatedness data.

Unfortunately, the mutex cannot be held around the entire process as it has
to be unlocked in the same thread in which it is locked - and we don't want
to hold up the allocator whilst we complete the writeback.

Fix this by replacing the mutex with a bit flag and a list of lock waiters
so that the lock can be dropped in the collector thread after collection is
complete.

Link: https://sashiko.dev/#/patchset/20260608145432.681865-1-dhowells%40redhat.com
Signed-off-by: David Howells <dhowells at redhat.com>
cc: Paulo Alcantara <pc at manguebit.org>
cc: netfs at lists.linux.dev
cc: linux-fsdevel at vger.kernel.org
---
 fs/afs/symlink.c         |  4 +-
 fs/netfs/locking.c       | 95 ++++++++++++++++++++++++++++++++++++++++
 fs/netfs/write_collect.c |  2 +
 fs/netfs/write_issue.c   | 36 ++++-----------
 include/linux/netfs.h    | 11 ++++-
 5 files changed, 116 insertions(+), 32 deletions(-)

diff --git a/fs/afs/symlink.c b/fs/afs/symlink.c
index ed5868369f37..16b4823cb7b7 100644
--- a/fs/afs/symlink.c
+++ b/fs/afs/symlink.c
@@ -255,11 +255,11 @@ int afs_symlink_writepages(struct address_space *mapping,
 	}
 
 	if (ret == 0) {
-		mutex_lock(&vnode->netfs.wb_lock);
+		netfs_wb_begin(&vnode->netfs, false);
 		netfs_free_folioq_buffer(vnode->directory);
 		vnode->directory = NULL;
 		vnode->directory_size = 0;
-		mutex_unlock(&vnode->netfs.wb_lock);
+		netfs_wb_end(&vnode->netfs);
 	} else if (ret == 1) {
 		ret = 0; /* Skipped write due to lock conflict. */
 	}
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
index 2249ecd09d0a..4e3be2b81504 100644
--- a/fs/netfs/locking.c
+++ b/fs/netfs/locking.c
@@ -9,6 +9,11 @@
 #include <linux/netfs.h>
 #include "internal.h"
 
+struct netfs_wb_waiter {
+	struct list_head	link;		/* Link in ictx->wb_queue */
+	struct task_struct	*waiter;	/* Waiter task; cleared when lock granted */
+};
+
 /*
  * inode_dio_wait_interruptible - wait for outstanding DIO requests to finish
  * @inode: inode to wait for
@@ -203,3 +208,93 @@ void netfs_end_io_direct(struct inode *inode)
 	up_read(&inode->i_rwsem);
 }
 EXPORT_SYMBOL(netfs_end_io_direct);
+
+/*
+ * Wait to have exclusive access to writeback.
+ */
+static bool netfs_wb_begin_wait(struct netfs_inode *ictx)
+{
+	struct netfs_wb_waiter waiter = {};
+	struct task_struct *tsk = current;
+	bool got = false;
+
+	spin_lock(&ictx->lock);
+
+	if (test_and_set_bit_lock(NETFS_ICTX_WB_LOCK, &ictx->flags)) {
+		get_task_struct(tsk);
+		waiter.waiter = tsk;
+		list_add_tail(&waiter.link, &ictx->wb_queue);
+	} else {
+		got = true;
+	}
+	spin_unlock(&ictx->lock);
+
+	if (!got) {
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			/* Read waiter before accessing inode state. */
+			if (smp_load_acquire(&waiter.waiter) == NULL)
+				break;
+			schedule();
+		}
+	}
+	__set_current_state(TASK_RUNNING);
+	return true;
+}
+
+/**
+ * netfs_wb_begin - Begin writeback, waiting if need be
+ * @ictx: The inode to get writeback access on
+ * @nowait: Return failure immediately rather than waiting if true
+ *
+ * Begin writeback to an inode, waiting for exclusive access if @nowait is
+ * false.  This prevents collection from being done out of order with respect
+ * to the issuance of write subrequests.
+ *
+ * Note that writeback may be ended in a different process (e.g. the collection
+ * function on a workqueue) than started it.
+ *
+ * Return: True if can proceed, false if denied.
+ */
+bool netfs_wb_begin(struct netfs_inode *ictx, bool nowait)
+{
+	if (!test_and_set_bit_lock(NETFS_ICTX_WB_LOCK, &ictx->flags))
+		return true;
+	if (nowait) {
+		netfs_stat(&netfs_n_wb_lock_skip);
+		return false;
+	}
+	netfs_stat(&netfs_n_wb_lock_wait);
+	return netfs_wb_begin_wait(ictx);
+}
+EXPORT_SYMBOL(netfs_wb_begin);
+
+/* netfs_wb_end - End writeback
+ * @ictx: The inode we have writeback access to
+ *
+ * End writeback access on an inode, waking up the next writeback request.
+ */
+void netfs_wb_end(struct netfs_inode *ictx)
+{
+	struct netfs_wb_waiter *waiter;
+	struct task_struct *tsk;
+
+	WARN_ON_ONCE(!test_bit(NETFS_ICTX_WB_LOCK, &ictx->flags));
+
+	spin_lock(&ictx->lock);
+
+	waiter = list_first_entry_or_null(&ictx->wb_queue, struct netfs_wb_waiter, link);
+	if (waiter) {
+		list_del(&waiter->link);
+		tsk = waiter->waiter;
+		/* Write inode state before clearing waiter. */
+		smp_store_release(&waiter->waiter, NULL);
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	} else {
+		clear_bit_unlock(NETFS_ICTX_WB_LOCK, &ictx->flags);
+	}
+
+	spin_unlock(&ictx->lock);
+}
+EXPORT_SYMBOL(netfs_wb_end);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 24fc2bb2f8a4..61e2d1e8891e 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -408,6 +408,8 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
 	netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
 	/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
 
+	netfs_wb_end(ictx);
+
 	if (wreq->iocb) {
 		size_t written = min(wreq->transferred, wreq->len);
 		wreq->iocb->ki_pos += written;
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index c03c7cc45e47..3454e8b0c248 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -551,14 +551,8 @@ int netfs_writepages(struct address_space *mapping,
 	struct folio *folio;
 	int error = 0;
 
-	if (!mutex_trylock(&ictx->wb_lock)) {
-		if (wbc->sync_mode == WB_SYNC_NONE) {
-			netfs_stat(&netfs_n_wb_lock_skip);
-			return 0;
-		}
-		netfs_stat(&netfs_n_wb_lock_wait);
-		mutex_lock(&ictx->wb_lock);
-	}
+	if (!netfs_wb_begin(ictx, wbc->sync_mode == WB_SYNC_NONE))
+		return 0;
 
 	/* Need the first folio to be able to set up the op. */
 	folio = writeback_iter(mapping, wbc, NULL, &error);
@@ -593,8 +587,6 @@ int netfs_writepages(struct address_space *mapping,
 	} while ((folio = writeback_iter(mapping, wbc, folio, &error)));
 
 	netfs_end_issue_write(wreq);
-
-	mutex_unlock(&ictx->wb_lock);
 	netfs_wake_collector(wreq);
 
 	netfs_put_request(wreq, netfs_rreq_trace_put_return);
@@ -604,7 +596,7 @@ int netfs_writepages(struct address_space *mapping,
 couldnt_start:
 	netfs_kill_dirty_pages(mapping, wbc, folio);
 out:
-	mutex_unlock(&ictx->wb_lock);
+	netfs_wb_end(ictx);
 	_leave(" = %d", error);
 	return error;
 }
@@ -618,12 +610,12 @@ struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len
 	struct netfs_io_request *wreq = NULL;
 	struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));
 
-	mutex_lock(&ictx->wb_lock);
+	netfs_wb_begin(ictx, false);
 
 	wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
 				      iocb->ki_pos, NETFS_WRITETHROUGH);
 	if (IS_ERR(wreq)) {
-		mutex_unlock(&ictx->wb_lock);
+		netfs_wb_end(ictx);
 		return wreq;
 	}
 
@@ -685,7 +677,6 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
 ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
 			       struct folio *writethrough_cache)
 {
-	struct netfs_inode *ictx = netfs_inode(wreq->inode);
 	ssize_t ret;
 
 	_enter("R=%x", wreq->debug_id);
@@ -699,8 +690,6 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c
 
 	netfs_end_issue_write(wreq);
 
-	mutex_unlock(&ictx->wb_lock);
-
 	if (wreq->iocb)
 		ret = -EIOCBQUEUED;
 	else
@@ -847,16 +836,8 @@ int netfs_writeback_single(struct address_space *mapping,
 	if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
 		return -EIO;
 
-	if (!mutex_trylock(&ictx->wb_lock)) {
-		if (wbc->sync_mode == WB_SYNC_NONE) {
-			/* The VFS will have undirtied the inode. */
-			netfs_single_mark_inode_dirty(&ictx->inode);
-			netfs_stat(&netfs_n_wb_lock_skip);
-			return 1;
-		}
-		netfs_stat(&netfs_n_wb_lock_wait);
-		mutex_lock(&ictx->wb_lock);
-	}
+	if (!netfs_wb_begin(ictx, wbc->sync_mode == WB_SYNC_NONE))
+		return 1;
 
 	wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
 	if (IS_ERR(wreq)) {
@@ -893,7 +874,6 @@ int netfs_writeback_single(struct address_space *mapping,
 	smp_wmb(); /* Write lists before ALL_QUEUED. */
 	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
 
-	mutex_unlock(&ictx->wb_lock);
 	netfs_wake_collector(wreq);
 
 	netfs_put_request(wreq, netfs_rreq_trace_put_return);
@@ -901,7 +881,7 @@ int netfs_writeback_single(struct address_space *mapping,
 	return ret;
 
 couldnt_start:
-	mutex_unlock(&ictx->wb_lock);
+	netfs_wb_end(ictx);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 243c0f737938..06e6cceffaeb 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -61,14 +61,16 @@ struct netfs_inode {
 #if IS_ENABLED(CONFIG_FSCACHE)
 	struct fscache_cookie	*cache;
 #endif
-	struct mutex		wb_lock;	/* Writeback serialisation */
+	struct list_head	wb_queue;	/* Queue of processes wanting to do writeback */
 	loff_t			_remote_i_size;	/* Size of the remote file */
 	loff_t			_zero_point;	/* Size after which we assume there's no data
 						 * on the server */
+	spinlock_t		lock;		/* Lock covering wb_queue */
 	atomic_t		io_count;	/* Number of outstanding reqs */
 	unsigned long		flags;
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
+#define NETFS_ICTX_WB_LOCK	2		/* Writeback serialisation lock */
 #define NETFS_ICTX_MODIFIED_ATTR 3		/* Indicate change in mtime/ctime */
 #define NETFS_ICTX_SINGLE_NO_UPLOAD 4		/* Monolithic payload, cache but no upload */
 };
@@ -462,6 +464,10 @@ int netfs_alloc_folioq_buffer(struct address_space *mapping,
 			      size_t *_cur_size, ssize_t size, gfp_t gfp);
 void netfs_free_folioq_buffer(struct folio_queue *fq);
 
+/* Writeback exclusion API. */
+bool netfs_wb_begin(struct netfs_inode *ictx, bool nowait);
+void netfs_wb_end(struct netfs_inode *ictx);
+
 /**
  * netfs_inode - Get the netfs inode context from the inode
  * @inode: The inode to query
@@ -743,7 +749,8 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
 #if IS_ENABLED(CONFIG_FSCACHE)
 	ctx->cache = NULL;
 #endif
-	mutex_init(&ctx->wb_lock);
+	INIT_LIST_HEAD(&ctx->wb_queue);
+	spin_lock_init(&ctx->lock);
 	/* ->releasepage() drives zero_point */
 	if (use_zero_point) {
 		ctx->_zero_point = ctx->_remote_i_size;




More information about the linux-afs mailing list