[EXT] Re: BUG: scheduling while atomic when nvmet_rdma_queue_response fails in posting a request

Mon Jun 14 07:44:51 PDT 2021

> From: Sagi Grimberg <sagi at grimberg.me>
> Sent: Wednesday, June 9, 2021 3:04 AM
> 
> ----------------------------------------------------------------------
> 
> >> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> >> index 7d607f435e36..6d2eea322779 100644
> >> --- a/drivers/nvme/target/rdma.c
> >> +++ b/drivers/nvme/target/rdma.c
> >> @@ -16,6 +16,7 @@
> >>   #include <linux/wait.h>
> >>   #include <linux/inet.h>
> >>   #include <asm/unaligned.h>
> >> +#include <linux/async.h>
> >>
> >>   #include <rdma/ib_verbs.h>
> >>   #include <rdma/rdma_cm.h>
> >> @@ -712,6 +713,12 @@ static void nvmet_rdma_send_done(struct ib_cq
> *cq,
> >> struct ib_wc *wc)
> >>          }
> >>   }
> >>
> >> +static void nvmet_rdma_async_release_rsp(void *data, async_cookie_t
> cookie)
> >> +{
> >> +       struct nvmet_rdma_rsp *rsp = data;
> >> +       nvmet_rdma_release_rsp(rsp);
> >> +}
> >> +
> >>   static void nvmet_rdma_queue_response(struct nvmet_req *req)
> >>   {
> >>          struct nvmet_rdma_rsp *rsp =
> >> @@ -745,7 +752,12 @@ static void nvmet_rdma_queue_response(struct
> nvmet_req
> >> *req)
> >>
> >>          if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
> >>                  pr_err("sending cmd response failed\n");
> >> -               nvmet_rdma_release_rsp(rsp);
> >> +               /*
> >> +                * We might be in atomic context, hence release
> >> +                * the rsp in async context in case we need to
> >> +                * process the wr_wait_list.
> >> +                */
> >> +               async_schedule(nvmet_rdma_async_release_rsp, rsp);
> >>          }
> >>   }
> >
> > Just FYI, async_schedule() has conditions where it may execute your
> > callback synchronously. Your suggestion is probably fine for testing,
> > but it sounds like you require something that can guarantee a non-atomic
> > context for nvmet_rdma_release_rsp().
> 
> OK, it seems that the issue is that we are submitting I/O in atomic
> context. This should be more appropriate...

Thanks Sagi, this seems to work. I'm still hitting some other issues where in some cases reconnect fails, but I'm 
Collecting more info. 

> 
> --
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 7d607f435e36..16f2f5a84ae7 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -102,6 +102,7 @@ struct nvmet_rdma_queue {
> 
>          struct work_struct      release_work;
>          struct list_head        rsp_wait_list;
> +       struct work_struct      wr_wait_work;
>          struct list_head        rsp_wr_wait_list;
>          spinlock_t              rsp_wr_wait_lock;
> 
> @@ -517,8 +518,10 @@ static int nvmet_rdma_post_recv(struct
> nvmet_rdma_device *ndev,
>          return ret;
>   }
> 
> -static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue
> *queue)
> +static void nvmet_rdma_process_wr_wait_list(struct work_struct *w)
>   {
> +       struct nvmet_rdma_queue *queue =
> +               container_of(w, struct nvmet_rdma_queue, wr_wait_work);
>          spin_lock(&queue->rsp_wr_wait_lock);
>          while (!list_empty(&queue->rsp_wr_wait_list)) {
>                  struct nvmet_rdma_rsp *rsp;
> @@ -677,7 +680,7 @@ static void nvmet_rdma_release_rsp(struct
> nvmet_rdma_rsp *rsp)
>                  nvmet_req_free_sgls(&rsp->req);
> 
>          if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
> -               nvmet_rdma_process_wr_wait_list(queue);
> +               schedule_work(&queue->wr_wait_work);
> 
>          nvmet_rdma_put_rsp(rsp);
>   }
> @@ -1446,6 +1449,7 @@ nvmet_rdma_alloc_queue(struct
> nvmet_rdma_device *ndev,
>           * inside a CM callback would trigger a deadlock. (great API
> design..)
>           */
>          INIT_WORK(&queue->release_work,
> nvmet_rdma_release_queue_work);
> +       INIT_WORK(&queue->wr_wait_work,
> nvmet_rdma_process_wr_wait_list);
>          queue->dev = ndev;
>          queue->cm_id = cm_id;
>          queue->port = port->nport;
> --
Thanks, 

Tested-by: Michal Kalderon <michal.kalderon at marvell.com>