[PATCH 2/2] nvmet: fix a race condition between release_queue and io_work

John Meneghini jmeneghi at redhat.com
Thu Oct 21 07:57:32 PDT 2021


Reviewed-by: John Meneghini <jmeneghi at redhat.com>

On 10/21/21 4:41 AM, Maurizio Lombardi wrote:
> If the initiator executes a reset controller operation while
> performing I/O, the target kernel will crash because of a race condition
> between release_queue and io_work;
> nvmet_tcp_uninit_data_in_cmds() may be executed while io_work
> is running, calling flush_work(io_work) was not sufficient to
> prevent this because io_work could requeue itself.
> 
> * Fix this bug by preventing io_work from being enqueued when
> sk_user_data is NULL (it means that the queue is going to be deleted)
> 
> * Ensure that all the memory allocated for the commands' iovec is freed
> 
> Signed-off-by: Maurizio Lombardi <mlombard at redhat.com>
> ---
>   drivers/nvme/target/tcp.c | 13 +++++++++----
>   1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index 2f03a94725ae..1eedbd83c95f 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -551,6 +551,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
>   	struct nvmet_tcp_cmd *cmd =
>   		container_of(req, struct nvmet_tcp_cmd, req);
>   	struct nvmet_tcp_queue	*queue = cmd->queue;
> +	struct socket *sock = queue->sock;
>   	struct nvme_sgl_desc *sgl;
>   	u32 len;
>   
> @@ -570,7 +571,10 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
>   	}
>   
>   	llist_add(&cmd->lentry, &queue->resp_list);
> -	queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
> +	read_lock_bh(&sock->sk->sk_callback_lock);
> +	if (likely(sock->sk->sk_user_data))
> +		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
> +	read_unlock_bh(&sock->sk->sk_callback_lock);
>   }
>   
>   static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
> @@ -1427,7 +1431,9 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
>   
>   	for (i = 0; i < queue->nr_cmds; i++, cmd++) {
>   		if (nvmet_tcp_need_data_in(cmd))
> -			nvmet_tcp_finish_cmd(cmd);
> +			nvmet_req_uninit(&cmd->req);
> +		nvmet_tcp_unmap_pdu_iovec(cmd);
> +		nvmet_tcp_free_iovec(cmd);
>   	}
>   
>   	if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
> @@ -1447,11 +1453,10 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
>   	mutex_unlock(&nvmet_tcp_queue_mutex);
>   
>   	nvmet_tcp_restore_socket_callbacks(queue);
> -	flush_work(&queue->io_work);
> +	cancel_work_sync(&queue->io_work);
>   
>   	nvmet_tcp_uninit_data_in_cmds(queue);
>   	nvmet_sq_destroy(&queue->nvme_sq);
> -	cancel_work_sync(&queue->io_work);
>   	sock_release(queue->sock);
>   	nvmet_tcp_free_cmds(queue);
>   	if (queue->hdr_digest || queue->data_digest)
> 




More information about the Linux-nvme mailing list