[PATCH 5/6] nvme-rdma: fix timeout handler

Sagi Grimberg sagi at grimberg.me
Thu Aug 6 16:11:36 EDT 2020


>> @@ -1946,6 +1947,22 @@ static int nvme_rdma_cm_handler(struct 
>> rdma_cm_id *cm_id,
>>       return 0;
>>   }
>> +static void nvme_rdma_complete_timed_out(struct request *rq)
>> +{
>> +    struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
>> +    struct nvme_rdma_queue *queue = req->queue;
>> +    struct nvme_rdma_ctrl *ctrl = queue->ctrl;
>> +
>> +    /* fence other contexts that may complete the command */
>> +    flush_work(&ctrl->err_work) > +    nvme_rdma_stop_queue(queue);
>> +    if (blk_mq_request_completed(rq))
>> +        return;
>> +    nvme_req(rq)->flags |= NVME_REQ_CANCELLED;
>> +    nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
>> +    blk_mq_complete_request(rq);
> 
> 
> If keep_alive times out, is is possible we try and
> blk_mq_free_request() twice for same request.
> 
> blk_mq_complete_request
>   nvme_rdma_complete_rq
>    blk_mq_end_request
>     __blk_mq_end_request
>      rq->end_io(rq, error) - nvme_keep_alive_end_io
>       blk_mq_free_request
>        __blk_mq_free_request
>          rq->mq_hctx = NULL;
> .
> .
> .
> return BLK_EH_DONE to blk_mq_rq_timed_out
> 
> And then before returning from blk_mq_check_expired
> back down
> 
> rq->end_io(rq, 0)
>   nvme_keep_alive_end_io
>    blk_mq_free_request
>     atomic_dec(&hctx->nr_active)
> 
> since rq->mq_hctx is now NULL, crash in blk_mq_free_request

But the keep alive request is not a flush request...



More information about the Linux-nvme mailing list