[RFC PATCH 05/17] netfs: Add some tools for managing bvecq chains

Wed Mar 4 06:03:12 PST 2026

Provide a selection of tools for managing bvec queue chains.  This
includes:

 (1) Allocation, prepopulation, expansion, shortening and refcounting of
     bvecqs and bvecq chains.

     This can be used to do things like creating an encryption buffer in
     cifs or a directory content buffer in afs.  The memory segments will
     be appropriate disposed off according to the flags on the bvecq.

 (2) Management of a bvecq chain as a rolling buffer and the management of
     positions within it.

 (3) Loading folios, slicing chains and clearing content.

Signed-off-by: David Howells <dhowells at redhat.com>
cc: Paulo Alcantara <pc at manguebit.org>
cc: Matthew Wilcox <willy at infradead.org>
cc: Christoph Hellwig <hch at infradead.org>
cc: linux-cifs at vger.kernel.org
cc: netfs at lists.linux.dev
cc: linux-fsdevel at vger.kernel.org
---
 fs/netfs/Makefile            |   1 +
 fs/netfs/bvecq.c             | 634 +++++++++++++++++++++++++++++++++++
 fs/netfs/internal.h          |  87 +++++
 fs/netfs/stats.c             |   4 +-
 include/linux/netfs.h        |  24 ++
 include/trace/events/netfs.h |  24 ++
 6 files changed, 773 insertions(+), 1 deletion(-)
 create mode 100644 fs/netfs/bvecq.c

diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index b43188d64bd8..e1f12ecb5abf 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -3,6 +3,7 @@
 netfs-y := \
 	buffered_read.o \
 	buffered_write.o \
+	bvecq.o \
 	direct_read.o \
 	direct_write.o \
 	iterator.o \
diff --git a/fs/netfs/bvecq.c b/fs/netfs/bvecq.c
new file mode 100644
index 000000000000..e223beb6661b
--- /dev/null
+++ b/fs/netfs/bvecq.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Buffering helpers for bvec queues
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells at redhat.com)
+ */
+
+#include "internal.h"
+
+void dump_bvecq(const struct bvecq *bq)
+{
+	int b = 0;
+
+	for (; bq; bq = bq->next, b++) {
+		int skipz = 0;
+
+		pr_notice("BQ[%u] %u/%u fp=%llx\n", b, bq->nr_segs, bq->max_segs, bq->fpos);
+		for (int s = 0; s < bq->nr_segs; s++) {
+			const struct bio_vec *bv = &bq->bv[s];
+
+			if (!bv->bv_page && !bv->bv_len && skipz < 2) {
+				skipz = 1;
+				continue;
+			}
+			if (skipz == 1)
+				pr_notice("BQ[%u:00-%02u] ...\n", b, s - 1);
+			skipz = 2;
+			pr_notice("BQ[%u:%02u] %10lx %04x %04x %u\n",
+				  b, s,
+				  bv->bv_page ? page_to_pfn(bv->bv_page) : 0,
+				  bv->bv_offset, bv->bv_len,
+				  bv->bv_page ? page_count(bv->bv_page) : 0);
+		}
+	}
+}
+
+/*
+ * Allocate a single bvecq chain element and initialise the header.
+ */
+struct bvecq *netfs_alloc_one_bvecq(size_t nr_slots, gfp_t gfp)
+{
+	struct bvecq *bq;
+	const size_t max_size = 512;
+	const size_t max_segs = (max_size - sizeof(*bq)) / sizeof(bq->__bv[0]);
+	size_t part = umin(nr_slots, max_segs);
+	size_t size = roundup_pow_of_two(struct_size(bq, __bv, part));
+
+	bq = kmalloc(size, gfp);
+	if (bq) {
+		*bq = (struct bvecq) {
+			.ref		= REFCOUNT_INIT(1),
+			.bv		= bq->__bv,
+			.inline_bv	= true,
+			.max_segs	= (size - sizeof(*bq)) / sizeof(bq->__bv[0]),
+		};
+		netfs_stat(&netfs_n_bvecq);
+	}
+	return bq;
+}
+
+/**
+ * netfs_alloc_bvecq - Allocate an unpopulated bvec queue
+ * @nr_slots: Number of slots to allocate
+ * @gfp: The allocation constraints.
+ *
+ * Allocate a chain of bvecq buffers providing at least the requested
+ * cumulative number of slots.
+ */
+struct bvecq *netfs_alloc_bvecq(size_t nr_slots, gfp_t gfp)
+{
+	struct bvecq *head = NULL, *tail = NULL;
+
+	_enter("%zu", nr_slots);
+
+	for (;;) {
+		struct bvecq *bq;
+
+		bq = netfs_alloc_one_bvecq(nr_slots, gfp);
+		if (!bq)
+			goto oom;
+
+		if (tail) {
+			tail->next = bq;
+			bq->prev = tail;
+		} else {
+			head = bq;
+		}
+		tail = bq;
+		if (tail->max_segs >= nr_slots)
+			break;
+		nr_slots -= tail->max_segs;
+	}
+
+	return head;
+oom:
+	netfs_free_bvecq_buffer(head);
+	return NULL;
+}
+EXPORT_SYMBOL(netfs_alloc_bvecq);
+
+/**
+ * netfs_alloc_bvecq_buffer - Allocate buffer space into a bvec queue
+ * @size: Target size of the buffer (can be 0 for an empty buffer)
+ * @pre_slots: Number of preamble slots to set aside
+ * @gfp: The allocation constraints.
+ */
+struct bvecq *netfs_alloc_bvecq_buffer(size_t size, unsigned int pre_slots, gfp_t gfp)
+{
+	struct bvecq *head = NULL, *tail = NULL, *p = NULL;
+	size_t count = DIV_ROUND_UP(size, PAGE_SIZE);
+
+	_enter("%zx,%zx,%u", size, count, pre_slots);
+
+	do {
+		struct page **pages;
+		int want, got;
+
+		p = netfs_alloc_one_bvecq(umin(count, 32 - 3), gfp);
+		if (!p)
+			goto oom;
+
+		p->free = true;
+
+		if (tail) {
+			tail->next = p;
+			p->prev = tail;
+		} else {
+			head = p;
+		}
+		tail = p;
+		if (!count)
+			break;
+
+		pages = (struct page **)&p->bv[p->max_segs];
+		pages -= p->max_segs - pre_slots;
+
+		want = umin(count, p->max_segs - pre_slots);
+		got = alloc_pages_bulk(gfp, want, pages);
+		if (got < want) {
+			for (int i = 0; i < got; i++)
+				__free_page(pages[i]);
+			goto oom;
+		}
+
+		tail->nr_segs = pre_slots + got;
+		for (int i = 0; i < got; i++) {
+			int j = pre_slots + i;
+
+			set_page_count(pages[i], 1);
+			bvec_set_page(&tail->bv[j], pages[i], PAGE_SIZE, 0);
+		}
+
+		count -= got;
+		pre_slots = 0;
+	} while (count > 0);
+
+	return head;
+oom:
+	netfs_free_bvecq_buffer(head);
+	return NULL;
+}
+EXPORT_SYMBOL(netfs_alloc_bvecq_buffer);
+
+/**
+ * netfs_expand_bvecq_buffer - Allocate buffer space into a bvec queue
+ * @mapping: Address space to set on the folio (or NULL).
+ * @_buffer: Pointer to the folio queue to add to (may point to a NULL; updated).
+ * @_cur_size: Current size of the buffer (updated).
+ * @size: Target size of the buffer.
+ * @gfp: The allocation constraints.
+ */
+int netfs_expand_bvecq_buffer(struct bvecq **_buffer, size_t *_cur_size, ssize_t size, gfp_t gfp)
+{
+	struct bvecq *tail = *_buffer, *p;
+	const size_t max_segs = 32;
+
+	size = round_up(size, PAGE_SIZE);
+	if (*_cur_size >= size)
+		return 0;
+
+	if (tail)
+		while (tail->next)
+			tail = tail->next;
+
+	do {
+		struct page *page;
+		int order = 0;
+
+		if (!tail || bvecq_is_full(tail)) {
+			p = netfs_alloc_one_bvecq(max_segs, gfp);
+			if (!p)
+				return -ENOMEM;
+			if (tail) {
+				tail->next = p;
+				p->prev = tail;
+			} else {
+				*_buffer = p;
+			}
+			tail = p;
+		}
+
+		if (size - *_cur_size > PAGE_SIZE)
+			order = umin(ilog2(size - *_cur_size) - PAGE_SHIFT,
+				     MAX_PAGECACHE_ORDER);
+
+		page = alloc_pages(gfp | __GFP_COMP, order);
+		if (!page && order > 0)
+			page = alloc_pages(gfp | __GFP_COMP, 0);
+		if (!page)
+			return -ENOMEM;
+
+		bvec_set_page(&p->bv[p->nr_segs++], page, PAGE_SIZE << order, 0);
+		*_cur_size += PAGE_SIZE << order;
+	} while (*_cur_size < size);
+
+	return 0;
+}
+EXPORT_SYMBOL(netfs_expand_bvecq_buffer);
+
+static void netfs_bvecq_free_seg(struct bvecq *bq, unsigned int seg)
+{
+	if (!bq->free ||
+	    !bq->bv[seg].bv_page)
+		return;
+
+	if (bq->unpin)
+		unpin_user_page(bq->bv[seg].bv_page);
+	else
+		__free_page(bq->bv[seg].bv_page);
+}
+
+/**
+ * netfs_free_bvecq_buffer - Free a bvec queue
+ * @bq: The start of the folio queue to free
+ *
+ * Free up a chain of bvecqs and the pages it points to.
+ */
+void netfs_free_bvecq_buffer(struct bvecq *bq)
+{
+	struct bvecq *next;
+
+	for (; bq; bq = next) {
+		for (int seg = 0; seg < bq->nr_segs; seg++)
+			netfs_bvecq_free_seg(bq, seg);
+		next = bq->next;
+		netfs_stat_d(&netfs_n_bvecq);
+		kfree(bq);
+	}
+}
+EXPORT_SYMBOL(netfs_free_bvecq_buffer);
+
+/**
+ * netfs_put_bvecq - Put a bvec queue
+ * @bq: The start of the folio queue to free
+ *
+ * Put the ref(s) on the nodes in a bvec queue, freeing up the node and the
+ * page fragments it points to as the refcounts become zero.
+ */
+void netfs_put_bvecq(struct bvecq *bq)
+{
+	struct bvecq *next;
+
+	for (; bq; bq = next) {
+		if (!refcount_dec_and_test(&bq->ref))
+			break;
+		for (int seg = 0; seg < bq->nr_segs; seg++)
+			netfs_bvecq_free_seg(bq, seg);
+		next = bq->next;
+		netfs_stat_d(&netfs_n_bvecq);
+		kfree(bq);
+	}
+}
+EXPORT_SYMBOL(netfs_put_bvecq);
+
+/**
+ * netfs_shorten_bvecq_buffer - Shorten a bvec queue buffer
+ * @bq: The start of the buffer to shorten
+ * @seg: The slot to start from
+ * @size: The size to retain
+ *
+ * Shorten the content of a bvec queue down to the minimum number of segments,
+ * starting at the specified segment, to retain the specified size.  An error
+ * will be reported if the bvec queue is undersized.
+ */
+int netfs_shorten_bvecq_buffer(struct bvecq *bq, unsigned int seg, size_t size)
+{
+	ssize_t retain = size;
+
+	/* Skip through the segments we want to keep. */
+	for (; bq; bq = bq->next) {
+		for (; seg < bq->nr_segs; seg++) {
+			retain -= bq->bv[seg].bv_len;
+			if (retain < 0)
+				goto found;
+		}
+		seg = 0;
+	}
+	if (WARN_ON_ONCE(retain > 0))
+		return -EMSGSIZE;
+	return 0;
+
+found:
+	/* Shorten the entry to be retained and clean the rest of this bvecq. */
+	bq->bv[seg].bv_len += retain;
+	seg++;
+	for (int i = seg; i < bq->nr_segs; i++)
+		netfs_bvecq_free_seg(bq, i);
+	bq->nr_segs = seg;
+
+	/* Free the queue tail. */
+	netfs_free_bvecq_buffer(bq->next);
+	bq->next = NULL;
+	return 0;
+}
+
+/*
+ * Initialise a rolling buffer.  We allocate an empty bvecq struct to so that
+ * the pointers can be independently driven by the producer and the consumer.
+ */
+int bvecq_buffer_init(struct bvecq_pos *pos, unsigned int rreq_id)
+{
+	struct bvecq *bq;
+
+	bq = netfs_alloc_bvecq(14, GFP_NOFS);
+	if (!bq)
+		return -ENOMEM;
+
+	pos->bvecq  = bq; /* Comes with a ref. */
+	pos->slot   = 0;
+	pos->offset = 0;
+	return 0;
+}
+
+/*
+ * Add a new segment on to the rolling buffer; either because the previous one
+ * is full or because we have a discontiguity to contend with.
+ */
+int bvecq_buffer_make_space(struct bvecq_pos *pos)
+{
+	struct bvecq *bq, *head = pos->bvecq;
+
+	bq = netfs_alloc_bvecq(14, GFP_NOFS);
+	if (!bq)
+		return -ENOMEM;
+	bq->prev = head;
+
+	pos->bvecq = netfs_get_bvecq(bq);
+	pos->slot = 0;
+	pos->offset = 0;
+
+	/* Make sure the initialisation is stored before the next pointer.
+	 *
+	 * [!] NOTE: After we set head->next, the consumer is at liberty to
+	 * immediately delete the old head.
+	 */
+	smp_store_release(&head->next, bq);
+	netfs_put_bvecq(head);
+	return 0;
+}
+
+/*
+ * Advance a bvecq position by the given amount of data.
+ */
+void bvecq_pos_advance(struct bvecq_pos *pos, size_t amount)
+{
+	struct bvecq *bq = pos->bvecq;
+	unsigned int slot = pos->slot;
+	size_t offset = pos->offset;
+
+	if (slot >= bq->nr_segs) {
+		bq = bq->next;
+		slot = 0;
+	}
+
+	while (amount) {
+		const struct bio_vec *bv = &bq->bv[slot];
+		size_t part = umin(bv->bv_len - offset, amount);
+
+		if (likely(part < bv->bv_len)) {
+			offset += part;
+			break;
+		}
+		amount -= part;
+		offset = 0;
+		slot++;
+		if (slot >= bq->nr_segs) {
+			if (!bq->next)
+				break;
+			bq = bq->next;
+			slot = 0;
+		}
+	}
+
+	pos->slot   = slot;
+	pos->offset = offset;
+	bvecq_pos_move(pos, bq);
+}
+
+/*
+ * Clear memory fragments pointed to by a bvec queue, advancing the position.
+ */
+ssize_t bvecq_zero(struct bvecq_pos *pos, size_t amount)
+{
+	struct bvecq *bq = pos->bvecq;
+	unsigned int slot = pos->slot;
+	ssize_t cleared = 0;
+	size_t offset = pos->offset;
+
+	if (WARN_ON_ONCE(!bq))
+		return 0;
+
+	if (slot >= bq->nr_segs) {
+		bq = bq->next;
+		if (WARN_ON_ONCE(!bq))
+			return 0;
+		slot = 0;
+	}
+
+	do {
+		const struct bio_vec *bv = &bq->bv[slot];
+
+		if (offset < bv->bv_len) {
+			size_t part = umin(amount - cleared, bv->bv_len - offset);
+
+			memzero_page(bv->bv_page, bv->bv_offset + offset, part);
+
+			offset += part;
+			cleared += part;
+		}
+
+		if (offset >= bv->bv_len) {
+			offset = 0;
+			slot++;
+			if (slot >= bq->nr_segs) {
+				if (!bq->next)
+					break;
+				bq = bq->next;
+				slot = 0;
+			}
+		}
+	} while (cleared < amount);
+
+	bvecq_pos_move(pos, bq);
+	pos->slot = slot;
+	pos->offset = offset;
+	return cleared;
+}
+
+/*
+ * Determine the size and number of segments that can be obtained the next
+ * slice of bvec queue up to the maximum size and segment count specified.  The
+ * position cursor is updated to the end of the slice.
+ */
+size_t bvecq_slice(struct bvecq_pos *pos, size_t max_size,
+		   unsigned int max_segs, unsigned int *_nr_segs)
+{
+	struct bvecq *bq;
+	unsigned int slot = pos->slot, nsegs = 0;
+	size_t size = 0;
+	size_t offset = pos->offset;
+
+	bq = pos->bvecq;
+	for (;;) {
+		for (; slot < bq->nr_segs; slot++) {
+			const struct bio_vec *bvec = &bq->bv[slot];
+
+			if (offset < bvec->bv_len && bvec->bv_page) {
+				size_t part = umin(bvec->bv_len - offset, max_size);
+
+				size += part;
+				offset += part;
+				max_size -= part;
+				nsegs++;
+				if (!max_size || nsegs >= max_segs)
+					goto out;
+			}
+			offset = 0;
+		}
+
+		/* pos->bvecq isn't allowed to go NULL as the queue may get
+		 * extended and we would lose our place.
+		 */
+		if (!bq->next)
+			break;
+		slot = 0;
+		bq = bq->next;
+	}
+
+out:
+	*_nr_segs = nsegs;
+	if (slot == bq->nr_segs && bq->next) {
+		bq = bq->next;
+		slot = 0;
+		offset = 0;
+	}
+	bvecq_pos_move(pos, bq);
+	pos->slot = slot;
+	pos->offset = offset;
+	return size;
+}
+
+/*
+ * Extract page fragments from a bvec queue position into another bvecq, which
+ * we allocate.  The position is advanced.
+ */
+ssize_t bvecq_extract(struct bvecq_pos *pos, size_t amount,
+		      unsigned int max_segs, struct bvecq **to)
+{
+	struct bvecq_pos tmp_pos;
+	struct bvecq *src, *dst = NULL;
+	unsigned int slot = pos->slot, nsegs;
+	ssize_t extracted = 0;
+	size_t offset = pos->offset;
+
+	*to = NULL;
+	if (!max_segs)
+		max_segs = UINT_MAX;
+
+	bvecq_pos_attach(&tmp_pos, pos);
+	amount = bvecq_slice(&tmp_pos, amount, max_segs, &nsegs);
+	bvecq_pos_detach(&tmp_pos);
+	if (nsegs == 0)
+		return -EIO;
+
+	dst = netfs_alloc_bvecq(nsegs, GFP_KERNEL);
+	if (!dst)
+		return -ENOMEM;
+	*to = dst;
+
+	/* Transcribe the segments */
+	src = pos->bvecq;
+	for (;;) {
+		for (; slot < src->nr_segs; slot++) {
+			const struct bio_vec *sv = &src->bv[slot];
+			struct bio_vec *dv = &dst->bv[dst->nr_segs];
+
+			_debug("EXTR sl=%x off=%zx am=%zx p=%lx",
+			       slot, offset, amount, page_to_pfn(sv->bv_page));
+
+			if (offset < sv->bv_len && sv->bv_page) {
+				size_t part = umin(sv->bv_len - offset, amount);
+
+				bvec_set_page(dv, sv->bv_page, part,
+					      sv->bv_offset + offset);
+				extracted += part;
+				amount -= part;
+				offset += part;
+				trace_netfs_bv_slot(dst, dst->nr_segs);
+				dst->nr_segs++;
+				if (bvecq_is_full(dst))
+					dst = dst->next;
+				if (nsegs >= max_segs)
+					goto out;
+				if (amount == 0)
+					goto out;
+			}
+			offset = 0;
+		}
+
+		/* pos->bvecq isn't allowed to go NULL as the queue may get
+		 * extended and we would lose our place.
+		 */
+		if (!src->next)
+			break;
+		slot = 0;
+		src = src->next;
+	}
+
+out:
+	if (slot == src->nr_segs && src->next) {
+		src = src->next;
+		slot = 0;
+		offset = 0;
+	}
+	bvecq_pos_move(pos, src);
+	pos->slot = slot;
+	pos->offset = offset;
+	return extracted;
+}
+
+/*
+ * Decant part of the list of folios to read onto a bvecq.  The list must be
+ * pre-seeded with a bvecq object.
+ */
+ssize_t bvecq_load_from_ra(struct bvecq_pos *pos,
+			   struct readahead_control *ractl,
+			   struct folio_batch *put_batch)
+{
+	struct folio **folios;
+	struct bvecq *bq = pos->bvecq;
+	unsigned int space;
+	ssize_t loaded = 0;
+	int nr;
+
+	if (bvecq_is_full(bq)) {
+		bq = netfs_alloc_bvecq(14, GFP_NOFS);
+		if (!bq)
+			return -ENOMEM;
+		bq->prev = pos->bvecq;
+	}
+
+	space = bq->max_segs - bq->nr_segs;
+
+	folios = (struct folio **)(bq->bv + bq->max_segs);
+	folios -= space;
+
+	nr = __readahead_batch(ractl, (struct page **)folios, space);
+
+	_enter("%u/%u %u/%u", bq->nr_segs, bq->max_segs, nr, space);
+
+	bq->fpos = folio_pos(folios[0]);
+
+	for (int i = 0; i < nr; i++) {
+		struct folio *folio = folios[i];
+		size_t len = folio_size(folio);
+
+		loaded += len;
+		bvec_set_folio(&bq->bv[bq->nr_segs + i], folio, len, 0);
+
+		trace_netfs_folio(folio, netfs_folio_trace_read);
+		if (!folio_batch_add(put_batch, folio))
+			folio_batch_release(put_batch);
+	}
+
+	/* Update the counter after setting the slots. */
+	smp_store_release(&bq->nr_segs, bq->nr_segs + nr);
+
+	if (bq != pos->bvecq) {
+		/* Write the next pointer after initialisation. */
+		smp_store_release(&pos->bvecq->next, bq);
+		bvecq_pos_move(pos, bq);
+	}
+	return loaded;
+}
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index d436e20d3418..89ebeb49e969 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -33,6 +33,92 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
 void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
 			 loff_t pos, size_t copied);
 
+/*
+ * bvecq.c
+ */
+struct bvecq *netfs_alloc_one_bvecq(size_t nr_slots, gfp_t gfp);
+int bvecq_buffer_init(struct bvecq_pos *pos, unsigned int rreq_id);
+int bvecq_buffer_make_space(struct bvecq_pos *pos);
+void bvecq_pos_advance(struct bvecq_pos *pos, size_t amount);
+ssize_t bvecq_zero(struct bvecq_pos *pos, size_t amount);
+size_t bvecq_slice(struct bvecq_pos *pos, size_t max_size,
+		   unsigned int max_segs, unsigned int *_nr_segs);
+ssize_t bvecq_extract(struct bvecq_pos *pos, size_t amount,
+		      unsigned int max_segs, struct bvecq **to);
+ssize_t bvecq_load_from_ra(struct bvecq_pos *pos,
+			   struct readahead_control *ractl,
+			   struct folio_batch *put_batch);
+
+struct bvecq *netfs_get_bvecq(struct bvecq *bq);
+
+static inline void bvecq_pos_attach(struct bvecq_pos *pos, const struct bvecq_pos *at)
+{
+	*pos = *at;
+	netfs_get_bvecq(pos->bvecq);
+}
+
+static inline void bvecq_pos_detach(struct bvecq_pos *pos)
+{
+	netfs_put_bvecq(pos->bvecq);
+	pos->bvecq = NULL;
+	pos->slot = 0;
+	pos->offset = 0;
+}
+
+static inline void bvecq_pos_transfer(struct bvecq_pos *pos, struct bvecq_pos *from)
+{
+	*pos = *from;
+	from->bvecq = NULL;
+	from->slot = 0;
+	from->offset = 0;
+}
+
+static inline void bvecq_pos_move(struct bvecq_pos *pos, struct bvecq *to)
+{
+	struct bvecq *old = pos->bvecq;
+
+	if (old != to) {
+		pos->bvecq = netfs_get_bvecq(to);
+		netfs_put_bvecq(old);
+	}
+}
+
+static inline bool bvecq_buffer_step(struct bvecq_pos *pos)
+{
+	struct bvecq *bq = pos->bvecq;
+
+	pos->slot++;
+	pos->offset = 0;
+	if (pos->slot <= bq->nr_segs)
+		return true;
+	if (!bq->next)
+		return false;
+	bvecq_pos_move(pos, bq->next);
+	return true;
+}
+
+static inline struct bvecq *bvecq_buffer_delete_spent(struct bvecq_pos *pos)
+{
+	struct bvecq *spent = pos->bvecq;
+	/* Read the contents of the queue segment after the pointer to it. */
+	struct bvecq *next = smp_load_acquire(&spent->next);
+
+	if (!next)
+		return NULL;
+	next->prev = NULL;
+	spent->next = NULL;
+	netfs_put_bvecq(spent);
+	pos->bvecq = next; /* We take spent's ref */
+	pos->slot = 0;
+	pos->offset = 0;
+	return next;
+}
+
+static inline bool bvecq_is_full(const struct bvecq *bvecq)
+{
+	return bvecq->nr_segs >= bvecq->max_segs;
+}
+
 /*
  * main.c
  */
@@ -166,6 +252,7 @@ extern atomic_t netfs_n_wh_retry_write_subreq;
 extern atomic_t netfs_n_wb_lock_skip;
 extern atomic_t netfs_n_wb_lock_wait;
 extern atomic_t netfs_n_folioq;
+extern atomic_t netfs_n_bvecq;
 
 int netfs_stats_show(struct seq_file *m, void *v);
 
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index ab6b916addc4..84c2a4bcc762 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -48,6 +48,7 @@ atomic_t netfs_n_wh_retry_write_subreq;
 atomic_t netfs_n_wb_lock_skip;
 atomic_t netfs_n_wb_lock_wait;
 atomic_t netfs_n_folioq;
+atomic_t netfs_n_bvecq;
 
 int netfs_stats_show(struct seq_file *m, void *v)
 {
@@ -90,9 +91,10 @@ int netfs_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&netfs_n_rh_retry_read_subreq),
 		   atomic_read(&netfs_n_wh_retry_write_req),
 		   atomic_read(&netfs_n_wh_retry_write_subreq));
-	seq_printf(m, "Objs   : rr=%u sr=%u foq=%u wsc=%u\n",
+	seq_printf(m, "Objs   : rr=%u sr=%u bq=%u foq=%u wsc=%u\n",
 		   atomic_read(&netfs_n_rh_rreq),
 		   atomic_read(&netfs_n_rh_sreq),
+		   atomic_read(&netfs_n_bvecq),
 		   atomic_read(&netfs_n_folioq),
 		   atomic_read(&netfs_n_wh_wstream_conflict));
 	seq_printf(m, "WbLock : skip=%u wait=%u\n",
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 72ee7d210a74..f360b25ceb31 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -17,12 +17,14 @@
 #include <linux/workqueue.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/bvec.h>
 #include <linux/uio.h>
 #include <linux/rolling_buffer.h>
 
 enum netfs_sreq_ref_trace;
 typedef struct mempool mempool_t;
 struct folio_queue;
+struct bvecq;
 
 /**
  * folio_start_private_2 - Start an fscache write on a folio.  [DEPRECATED]
@@ -40,6 +42,16 @@ static inline void folio_start_private_2(struct folio *folio)
 	folio_set_private_2(folio);
 }
 
+/*
+ * Position in a bio_vec queue.  The bvecq holds a ref on the queue segment it
+ * points to.
+ */
+struct bvecq_pos {
+	struct bvecq		*bvecq;		/* The first bvecq */
+	unsigned int		offset;		/* The offset within the starting slot */
+	u16			slot;		/* The starting slot */
+};
+
 enum netfs_io_source {
 	NETFS_SOURCE_UNKNOWN,
 	NETFS_FILL_WITH_ZEROES,
@@ -462,6 +474,12 @@ int netfs_alloc_folioq_buffer(struct address_space *mapping,
 			      struct folio_queue **_buffer,
 			      size_t *_cur_size, ssize_t size, gfp_t gfp);
 void netfs_free_folioq_buffer(struct folio_queue *fq);
+void dump_bvecq(const struct bvecq *bq);
+struct bvecq *netfs_alloc_bvecq(size_t nr_slots, gfp_t gfp);
+struct bvecq *netfs_alloc_bvecq_buffer(size_t size, unsigned int pre_slots, gfp_t gfp);
+void netfs_free_bvecq_buffer(struct bvecq *bq);
+void netfs_put_bvecq(struct bvecq *bq);
+int netfs_shorten_bvecq_buffer(struct bvecq *bq, unsigned int seg, size_t size);
 
 /**
  * netfs_inode - Get the netfs inode context from the inode
@@ -552,4 +570,10 @@ static inline void netfs_wait_for_outstanding_io(struct inode *inode)
 	wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
 }
 
+static inline struct bvecq *netfs_get_bvecq(struct bvecq *bq)
+{
+	refcount_inc(&bq->ref);
+	return bq;
+}
+
 #endif /* _LINUX_NETFS_H */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 2d366be46a1c..2523adc3ad85 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -778,6 +778,30 @@ TRACE_EVENT(netfs_folioq,
 		      __print_symbolic(__entry->trace, netfs_folioq_traces))
 	    );
 
+TRACE_EVENT(netfs_bv_slot,
+	    TP_PROTO(const struct bvecq *bq, int slot),
+
+	    TP_ARGS(bq, slot),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned long,		pfn)
+		    __field(unsigned int,		offset)
+		    __field(unsigned int,		len)
+		    __field(unsigned int,		slot)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->slot = slot;
+		    __entry->pfn = page_to_pfn(bq->bv[slot].bv_page);
+		    __entry->offset = bq->bv[slot].bv_offset;
+		    __entry->len = bq->bv[slot].bv_len;
+			   ),
+
+	    TP_printk("bq[%x] p=%lx %x-%x",
+		      __entry->slot,
+		      __entry->pfn, __entry->offset, __entry->offset + __entry->len)
+	    );
+
 #undef EM
 #undef E_
 #endif /* _TRACE_NETFS_H */