[PATCH] nvme-pci: fix sleeping function called from interrupt context

Maurizio Lombardi mlombard at redhat.com
Fri Dec 15 06:31:36 PST 2023


the nvme_handle_cqe() interrupt handler calls nvme_complete_async_event()
but the latter may call some blocking functions. Sleeping functions
can't be called in interrupt context.

BUG: sleeping function called from invalid context
in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/15
 Call Trace:
 <IRQ>
  __cancel_work_timer+0x31e/0x460
  ? nvme_change_ctrl_state+0xcf/0x3c0 [nvme_core]
  ? nvme_change_ctrl_state+0xcf/0x3c0 [nvme_core]
  nvme_complete_async_event+0x365/0x480 [nvme_core]
  nvme_poll_cq+0x262/0xe50 [nvme]

Fix the bug by deferring the call to nvme_complete_async_event() to
the nvme_wq workqueue, add a wait_queue to be sure there are no async
events waiting to be completed before stopping the controller.

Signed-off-by: Maurizio Lombardi <mlombard at redhat.com>
---
 drivers/nvme/host/pci.c | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 61af7ff1a9d6..4ac3d3606c4f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -139,6 +139,8 @@ struct nvme_dev {
 	u32 cmbsz;
 	u32 cmbloc;
 	struct nvme_ctrl ctrl;
+	wait_queue_head_t async_wq;
+	atomic_t async_completing;
 	u32 last_ps;
 	bool hmb;
 
@@ -161,6 +163,13 @@ struct nvme_dev {
 	unsigned int nr_poll_queues;
 };
 
+struct nvme_compl_async_data {
+	struct work_struct cmpl_work;
+	struct nvme_dev *dev;
+	union nvme_result result;
+	__le16 status;
+};
+
 static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
 {
 	return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE,
@@ -1008,6 +1017,20 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
 	return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
 }
 
+static void nvme_complete_async_event_work(struct work_struct *work)
+{
+	struct nvme_compl_async_data *data;
+	struct nvme_dev *dev;
+
+	data = container_of(work, struct nvme_compl_async_data, cmpl_work);
+	dev = data->dev;
+
+	nvme_complete_async_event(&dev->ctrl, data->status, &data->result);
+	atomic_dec(&dev->async_completing);
+	wake_up(&dev->async_wq);
+	kfree(data);
+}
+
 static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
 				   struct io_comp_batch *iob, u16 idx)
 {
@@ -1022,8 +1045,18 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
 	 * for them but rather special case them here.
 	 */
 	if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) {
-		nvme_complete_async_event(&nvmeq->dev->ctrl,
-				cqe->status, &cqe->result);
+		struct nvme_compl_async_data *data;
+
+		data = kmalloc(sizeof(*data), GFP_ATOMIC);
+		if (!data)
+			return;
+
+		INIT_WORK(&data->cmpl_work, nvme_complete_async_event_work);
+		data->dev = nvmeq->dev;
+		data->status = cqe->status;
+		data->result = cqe->result;
+		atomic_inc(&nvmeq->dev->async_completing);
+		queue_work(nvme_wq, &data->cmpl_work);
 		return;
 	}
 
@@ -2933,6 +2966,8 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
 	INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
 	mutex_init(&dev->shutdown_lock);
 
+	init_waitqueue_head(&dev->async_wq);
+	atomic_set(&dev->async_completing, 0);
 	dev->nr_write_queues = write_queues;
 	dev->nr_poll_queues = poll_queues;
 	dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1;
@@ -3135,6 +3170,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	}
 
 	flush_work(&dev->ctrl.reset_work);
+	wait_event(dev->async_wq, !atomic_read(&dev->async_completing));
 	nvme_stop_ctrl(&dev->ctrl);
 	nvme_remove_namespaces(&dev->ctrl);
 	nvme_dev_disable(dev, true);
-- 
2.39.3




More information about the Linux-nvme mailing list