[PATCH v3 12/16] cifs: Fix reading into an ITER_FOLIOQ from the smbdirect code
Tom Talpey
tom at talpey.com
Wed Jun 25 11:24:17 PDT 2025
On 6/25/2025 1:55 PM, David Howells wrote:
> Stefan Metzmacher <metze at samba.org> wrote:
>
>>> read_rfc1002_done:
>>> + /* SMBDirect will read it all or nothing */
>>> + msg->msg_iter.count = 0;
>>
>> I think we should be remove this.
>>
>> And I think this patch should come after the
>> CONFIG_HARDENED_USERCOPY change otherwise a bisect will trigger the problem.
>
> Okay, done. I've attached the revised version here. I've also pushed it to
> my git branch and switched patches 12 & 13 there.
>
> David
> ---
> cifs: Fix reading into an ITER_FOLIOQ from the smbdirect code
>
> When performing a file read from RDMA, smbd_recv() prints an "Invalid msg
> type 4" error and fails the I/O. This is due to the switch-statement there
> not handling the ITER_FOLIOQ handed down from netfslib.
>
> Fix this by collapsing smbd_recv_buf() and smbd_recv_page() into
> smbd_recv() and just using copy_to_iter() instead of memcpy(). This
> future-proofs the function too, in case more ITER_* types are added.
>
> Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading")
> Reported-by: Stefan Metzmacher <metze at samba.org>
> Signed-off-by: David Howells <dhowells at redhat.com>
> cc: Steve French <stfrench at microsoft.com>
> cc: Tom Talpey <tom at talpey.com>
> cc: Paulo Alcantara (Red Hat) <pc at manguebit.com>
> cc: Matthew Wilcox <willy at infradead.org>
> cc: linux-cifs at vger.kernel.org
> cc: netfs at lists.linux.dev
> cc: linux-fsdevel at vger.kernel.org
> ---
> fs/smb/client/smbdirect.c | 112 ++++++----------------------------------------
> 1 file changed, 17 insertions(+), 95 deletions(-)
>
> diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
> index 0a9fd6c399f6..754e94a0e07f 100644
> --- a/fs/smb/client/smbdirect.c
> +++ b/fs/smb/client/smbdirect.c
> @@ -1778,35 +1778,39 @@ struct smbd_connection *smbd_get_connection(
> }
>
> /*
> - * Receive data from receive reassembly queue
> + * Receive data from the transport's receive reassembly queue
> * All the incoming data packets are placed in reassembly queue
> - * buf: the buffer to read data into
> + * iter: the buffer to read data into
> * size: the length of data to read
> * return value: actual data read
> - * Note: this implementation copies the data from reassebmly queue to receive
> + *
> + * Note: this implementation copies the data from reassembly queue to receive
> * buffers used by upper layer. This is not the optimal code path. A better way
> * to do it is to not have upper layer allocate its receive buffers but rather
> * borrow the buffer from reassembly queue, and return it after data is
> * consumed. But this will require more changes to upper layer code, and also
> * need to consider packet boundaries while they still being reassembled.
> */
> -static int smbd_recv_buf(struct smbd_connection *info, char *buf,
> - unsigned int size)
> +int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
> {
> struct smbdirect_socket *sc = &info->socket;
> struct smbd_response *response;
> struct smbdirect_data_transfer *data_transfer;
> + size_t size = iov_iter_count(&msg->msg_iter);
> int to_copy, to_read, data_read, offset;
> u32 data_length, remaining_data_length, data_offset;
> int rc;
>
> + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE))
> + return -EINVAL; /* It's a bug in upper layer to get there */
> +
> again:
> /*
> * No need to hold the reassembly queue lock all the time as we are
> * the only one reading from the front of the queue. The transport
> * may add more entries to the back of the queue at the same time
> */
> - log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
> + log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size,
> info->reassembly_data_length);
> if (info->reassembly_data_length >= size) {
> int queue_length;
> @@ -1844,7 +1848,10 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf,
> if (response->first_segment && size == 4) {
> unsigned int rfc1002_len =
> data_length + remaining_data_length;
> - *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
> + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len);
> + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr),
> + &msg->msg_iter) != sizeof(rfc1002_hdr))
> + return -EFAULT;
Shouldn't there be some kind of validity check on the rfc1002 length
field before this? For example, the high octet of that field is
required to be zero (by SMB) and the 24-bit length is not necessarily
checked yet. The original code just returned the decoded value but
this sticks it in the msg_iter. If that's safe, then ok but it seems
odd.
Tom.
> data_read = 4;
> response->first_segment = false;
> log_read(INFO, "returning rfc1002 length %d\n",
> @@ -1853,10 +1860,9 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf,
> }
>
> to_copy = min_t(int, data_length - offset, to_read);
> - memcpy(
> - buf + data_read,
> - (char *)data_transfer + data_offset + offset,
> - to_copy);
> + if (copy_to_iter((char *)data_transfer + data_offset + offset,
> + to_copy, &msg->msg_iter) != to_copy)
> + return -EFAULT;
>
> /* move on to the next buffer? */
> if (to_copy == data_length - offset) {
> @@ -1921,90 +1927,6 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf,
> goto again;
> }
>
> -/*
> - * Receive a page from receive reassembly queue
> - * page: the page to read data into
> - * to_read: the length of data to read
> - * return value: actual data read
> - */
> -static int smbd_recv_page(struct smbd_connection *info,
> - struct page *page, unsigned int page_offset,
> - unsigned int to_read)
> -{
> - struct smbdirect_socket *sc = &info->socket;
> - int ret;
> - char *to_address;
> - void *page_address;
> -
> - /* make sure we have the page ready for read */
> - ret = wait_event_interruptible(
> - info->wait_reassembly_queue,
> - info->reassembly_data_length >= to_read ||
> - sc->status != SMBDIRECT_SOCKET_CONNECTED);
> - if (ret)
> - return ret;
> -
> - /* now we can read from reassembly queue and not sleep */
> - page_address = kmap_atomic(page);
> - to_address = (char *) page_address + page_offset;
> -
> - log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
> - page, to_address, to_read);
> -
> - ret = smbd_recv_buf(info, to_address, to_read);
> - kunmap_atomic(page_address);
> -
> - return ret;
> -}
> -
> -/*
> - * Receive data from transport
> - * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
> - * return: total bytes read, or 0. SMB Direct will not do partial read.
> - */
> -int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
> -{
> - char *buf;
> - struct page *page;
> - unsigned int to_read, page_offset;
> - int rc;
> -
> - if (iov_iter_rw(&msg->msg_iter) == WRITE) {
> - /* It's a bug in upper layer to get there */
> - cifs_dbg(VFS, "Invalid msg iter dir %u\n",
> - iov_iter_rw(&msg->msg_iter));
> - rc = -EINVAL;
> - goto out;
> - }
> -
> - switch (iov_iter_type(&msg->msg_iter)) {
> - case ITER_KVEC:
> - buf = msg->msg_iter.kvec->iov_base;
> - to_read = msg->msg_iter.kvec->iov_len;
> - rc = smbd_recv_buf(info, buf, to_read);
> - break;
> -
> - case ITER_BVEC:
> - page = msg->msg_iter.bvec->bv_page;
> - page_offset = msg->msg_iter.bvec->bv_offset;
> - to_read = msg->msg_iter.bvec->bv_len;
> - rc = smbd_recv_page(info, page, page_offset, to_read);
> - break;
> -
> - default:
> - /* It's a bug in upper layer to get there */
> - cifs_dbg(VFS, "Invalid msg type %d\n",
> - iov_iter_type(&msg->msg_iter));
> - rc = -EINVAL;
> - }
> -
> -out:
> - /* SMBDirect will read it all or nothing */
> - if (rc > 0)
> - msg->msg_iter.count = 0;
> - return rc;
> -}
> -
> /*
> * Send data to transport
> * Each rqst is transported as a SMBDirect payload
>
>
More information about the linux-afs
mailing list