[PATCH net 2/3] rxrpc: Fix DATA decrypt vs splice() by copying data to buffer in recvmsg

David Laight david.laight.linux at gmail.com
Tue May 12 06:38:35 PDT 2026


On Mon, 11 May 2026 17:07:48 +0100
David Howells <dhowells at redhat.com> wrote:

> This improves the fix for CVE-2026-43500.
> 
> Fix the pagecache corruption from in-place decryption of a DATA packet
> transmitted locally by splice() by getting rid of the packet sharing in the
> I/O thread and unconditionally extracting the packet content into a bounce
> buffer in which the buffer is decrypted.  recvmsg() (or the kernel
> equivalent) then copies the data from the bounce buffer to the destination
> buffer.  The sk_buff then remains unmodified.
> 
> This has an additional advantage in that the packet is then arranged in the
> buffer with the correct alignment required for the crypto algorithms to
> process directly.  The performance of the crypto does seem to be a little
> faster and, surprisingly, the unencrypted performance doesn't seem to
> change much - possibly due to removing complexity from the I/O thread.

Yep, avoiding data copies is overrated :-)

> Yet another advantage is that the I/O thread doesn't have to copy packets
> which would slow down packet distribution, ACK generation, etc..
> 
> The buffer belongs to the call and is allocated initially at 2K,
> sufficiently large to hold a whole jumbo subpacket, but the buffer will be
> increased in size if needed.  There is one downside here, and that's if a
> MSG_PEEK of more than one byte occurs, it may move on to the next packet,
> replacing the content of the buffer.  In such a case, it has to go back and
> re-decrypt the current packet.
> 
> Note that rx_pkt_offset may legitimately see 0 as a valid offset now, so
> switch to using USHRT_MAX to indicate an invalid offset.
> 
> Note also that I would generally prefer to replace the buffers of the
> current sk_buff with a new kmalloc'd buffer of the right size, ditching the
> old data and frags as this makes the handling of MSG_PEEK easier and
> removes the double-decryption issue, but this looks like quite a
> complicated thing to achieve.  skb_morph() looks half way to what I want,
> but I don't want to have to allocate a new sk_buff.

Wouldn't you need to do that anyway when the kkb is shared - or can't
that happen?

> 
> Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()")
> Reported-by: Hyunwoo Kim <imv4bel at gmail.com>
> Closes: https://lore.kernel.org/r/afKV2zGR6rrelPC7@v4bel/
> Signed-off-by: David Howells <dhowells at redhat.com>
> cc: Marc Dionne <marc.dionne at auristor.com>
> cc: Jeffrey Altman <jaltman at auristor.com>
> cc: "David S. Miller" <davem at davemloft.net>
> cc: Eric Dumazet <edumazet at google.com>
> cc: Jakub Kicinski <kuba at kernel.org>
> cc: Paolo Abeni <pabeni at redhat.com>
> cc: Simon Horman <horms at kernel.org>
> cc: Jiayuan Chen <jiayuan.chen at linux.dev>
> cc: netdev at vger.kernel.org
> cc: linux-afs at lists.infradead.org
> cc: stable at vger.kernel.org
> ---
>  net/rxrpc/ar-internal.h |  7 +++-
>  net/rxrpc/call_event.c  | 22 +----------
>  net/rxrpc/call_object.c |  2 +
>  net/rxrpc/insecure.c    |  3 --
>  net/rxrpc/recvmsg.c     | 72 +++++++++++++++++++++++++++-------
>  net/rxrpc/rxgk.c        | 49 +++++++++++------------
>  net/rxrpc/rxgk_common.h | 79 +++++++++++++++++++++++++++++++++++++
>  net/rxrpc/rxkad.c       | 86 +++++++++++++++--------------------------
>  8 files changed, 200 insertions(+), 120 deletions(-)
> 
> diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
> index 27c2aa2dd023..783367eea798 100644
> --- a/net/rxrpc/ar-internal.h
> +++ b/net/rxrpc/ar-internal.h
> @@ -213,8 +213,6 @@ struct rxrpc_skb_priv {
>  		struct {
>  			u16		offset;		/* Offset of data */
>  			u16		len;		/* Length of data */
> -			u8		flags;
> -#define RXRPC_RX_VERIFIED	0x01
>  		};
>  		struct {
>  			rxrpc_seq_t	first_ack;	/* First packet in acks table */
> @@ -774,6 +772,11 @@ struct rxrpc_call {
>  	struct sk_buff_head	recvmsg_queue;	/* Queue of packets ready for recvmsg() */
>  	struct sk_buff_head	rx_queue;	/* Queue of packets for this call to receive */
>  	struct sk_buff_head	rx_oos_queue;	/* Queue of out of sequence packets */
> +	void			*rx_dec_buffer;	/* Decryption buffer */
> +	unsigned short		rx_dec_bsize;	/* rx_dec_buffer size */
> +	unsigned short		rx_dec_offset;	/* Decrypted packet data offset */
> +	unsigned short		rx_dec_len;	/* Decrypted packet data len */

Is it actually worth making those short rather than int?
I doubt the extra 4 bytes will matter and the generated code might be better.
(IIRC 32bit arm has a limited offset from 16 bit load/store, dunno about 64bit)

> +	rxrpc_seq_t		rx_dec_seq;	/* Packet in decryption buffer */
>  
>  	rxrpc_seq_t		rx_highest_seq;	/* Higest sequence number received */
>  	rxrpc_seq_t		rx_consumed;	/* Highest packet consumed */
> diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
> index 2b19b252225e..fec59d9338b9 100644
> --- a/net/rxrpc/call_event.c
> +++ b/net/rxrpc/call_event.c
> @@ -332,27 +332,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
>  
>  			saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK;
>  
> -			if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
> -			    sp->hdr.securityIndex != 0 &&
> -			    (skb_cloned(skb) ||
> -			     skb_has_frag_list(skb) ||
> -			     skb_has_shared_frag(skb))) {
> -				/* Unshare the packet so that it can be
> -				 * modified by in-place decryption.
> -				 */
> -				struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
> -
> -				if (nskb) {
> -					rxrpc_new_skb(nskb, rxrpc_skb_new_unshared);
> -					rxrpc_input_call_packet(call, nskb);
> -					rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx);
> -				} else {
> -					/* OOM - Drop the packet. */
> -					rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem);
> -				}
> -			} else {
> -				rxrpc_input_call_packet(call, skb);
> -			}
> +			rxrpc_input_call_packet(call, skb);
>  			rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
>  			did_receive = true;
>  		}
> diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
> index f035f486c139..fcb9d38bb521 100644
> --- a/net/rxrpc/call_object.c
> +++ b/net/rxrpc/call_object.c
> @@ -152,6 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
>  	spin_lock_init(&call->notify_lock);
>  	refcount_set(&call->ref, 1);
>  	call->debug_id		= debug_id;
> +	call->rx_pkt_offset	= USHRT_MAX;
>  	call->tx_total_len	= -1;
>  	call->tx_jumbo_max	= 1;
>  	call->next_rx_timo	= 20 * HZ;
> @@ -553,6 +554,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call)
>  	rxrpc_purge_queue(&call->recvmsg_queue);
>  	rxrpc_purge_queue(&call->rx_queue);
>  	rxrpc_purge_queue(&call->rx_oos_queue);
> +	kfree(call->rx_dec_buffer);
>  }
>  
>  /*
> diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
> index 0a260df45d25..7a26c6097d03 100644
> --- a/net/rxrpc/insecure.c
> +++ b/net/rxrpc/insecure.c
> @@ -32,9 +32,6 @@ static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
>  
>  static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
>  {
> -	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
> -
> -	sp->flags |= RXRPC_RX_VERIFIED;
>  	return 0;
>  }
>  
> diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
> index e1f7513a46db..865e368381d5 100644
> --- a/net/rxrpc/recvmsg.c
> +++ b/net/rxrpc/recvmsg.c
> @@ -147,15 +147,55 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
>  }
>  
>  /*
> - * Decrypt and verify a DATA packet.
> + * Decrypt and verify a DATA packet.  The content of the packet is pulled out
> + * into a flat buffer rather than decrypting in place in the skbuff.  This also
> + * has the advantage of aligning the buffer correctly for the crypto routines.
> + *
> + * We keep track of the sequence number of the packet currently decrypted into
> + * the buffer in ->rx_dec_seq.  Unfortunately, this means that a MSG_PEEK of
> + * more than one byte may cause a later packet to be decrypted into the buffer,
> + * requiring the original to be re-decrypted when recvmsg() is called again.
>   */
>  static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
>  {
>  	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
> +	int ret;
>  
> -	if (sp->flags & RXRPC_RX_VERIFIED)
> +	if (call->rx_dec_seq == sp->hdr.seq && call->rx_dec_buffer)
>  		return 0;
> -	return call->security->verify_packet(call, skb);
> +
> +	if (call->rx_dec_bsize < sp->len) {

IMHO That test is backwards; the 'more constant' value should be on the right.

> +		/* Make sure we can hold a 1412-byte jumbo subpacket and make
> +		 * sure that the buffer size is aligned to a crypto blocksize.
> +		 */
> +		size_t size = umin(round_up(sp->len, 32), 2048);

Doesn't min() work?

> +		void *buffer = krealloc(call->rx_dec_buffer, size, GFP_NOFS);
> +
> +		if (!buffer)
> +			return -ENOMEM;
> +		call->rx_dec_buffer = buffer;
> +		call->rx_dec_bsize = size;
> +	}

That doesn't look right.
If sp->len is bigger than 2048 the you keep allocating a new buffer
and the call below overruns the allocated buffer.

> +
> +	ret = -EFAULT;
> +	if (skb_copy_bits(skb, sp->offset, call->rx_dec_buffer, sp->len) < 0)
> +		goto err;
> +
...

-- David



More information about the linux-afs mailing list