BUG: scheduling while atomic when nvmet_rdma_queue_response fails in posting a request

Tue Jun 8 17:03:48 PDT 2021

>> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
>> index 7d607f435e36..6d2eea322779 100644
>> --- a/drivers/nvme/target/rdma.c
>> +++ b/drivers/nvme/target/rdma.c
>> @@ -16,6 +16,7 @@
>>   #include <linux/wait.h>
>>   #include <linux/inet.h>
>>   #include <asm/unaligned.h>
>> +#include <linux/async.h>
>>
>>   #include <rdma/ib_verbs.h>
>>   #include <rdma/rdma_cm.h>
>> @@ -712,6 +713,12 @@ static void nvmet_rdma_send_done(struct ib_cq *cq,
>> struct ib_wc *wc)
>>          }
>>   }
>>
>> +static void nvmet_rdma_async_release_rsp(void *data, async_cookie_t cookie)
>> +{
>> +       struct nvmet_rdma_rsp *rsp = data;
>> +       nvmet_rdma_release_rsp(rsp);
>> +}
>> +
>>   static void nvmet_rdma_queue_response(struct nvmet_req *req)
>>   {
>>          struct nvmet_rdma_rsp *rsp =
>> @@ -745,7 +752,12 @@ static void nvmet_rdma_queue_response(struct nvmet_req
>> *req)
>>
>>          if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
>>                  pr_err("sending cmd response failed\n");
>> -               nvmet_rdma_release_rsp(rsp);
>> +               /*
>> +                * We might be in atomic context, hence release
>> +                * the rsp in async context in case we need to
>> +                * process the wr_wait_list.
>> +                */
>> +               async_schedule(nvmet_rdma_async_release_rsp, rsp);
>>          }
>>   }
> 
> Just FYI, async_schedule() has conditions where it may execute your
> callback synchronously. Your suggestion is probably fine for testing,
> but it sounds like you require something that can guarantee a non-atomic
> context for nvmet_rdma_release_rsp().

OK, it seems that the issue is that we are submitting I/O in atomic
context. This should be more appropriate...

--

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 7d607f435e36..16f2f5a84ae7 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -102,6 +102,7 @@ struct nvmet_rdma_queue {

         struct work_struct      release_work;
         struct list_head        rsp_wait_list;
+       struct work_struct      wr_wait_work;
         struct list_head        rsp_wr_wait_list;
         spinlock_t              rsp_wr_wait_lock;

@@ -517,8 +518,10 @@ static int nvmet_rdma_post_recv(struct 
nvmet_rdma_device *ndev,
         return ret;
  }

-static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
+static void nvmet_rdma_process_wr_wait_list(struct work_struct *w)
  {
+       struct nvmet_rdma_queue *queue =
+               container_of(w, struct nvmet_rdma_queue, wr_wait_work);
         spin_lock(&queue->rsp_wr_wait_lock);
         while (!list_empty(&queue->rsp_wr_wait_list)) {
                 struct nvmet_rdma_rsp *rsp;
@@ -677,7 +680,7 @@ static void nvmet_rdma_release_rsp(struct 
nvmet_rdma_rsp *rsp)
                 nvmet_req_free_sgls(&rsp->req);

         if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
-               nvmet_rdma_process_wr_wait_list(queue);
+               schedule_work(&queue->wr_wait_work);

         nvmet_rdma_put_rsp(rsp);
  }
@@ -1446,6 +1449,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
          * inside a CM callback would trigger a deadlock. (great API 
design..)
          */
         INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
+       INIT_WORK(&queue->wr_wait_work, nvmet_rdma_process_wr_wait_list);
         queue->dev = ndev;
         queue->cm_id = cm_id;
         queue->port = port->nport;
--