[PATCH] nvme-pci: fix sleeping function called from interrupt context
Maurizio Lombardi
mlombard at redhat.com
Fri Dec 15 06:31:36 PST 2023
the nvme_handle_cqe() interrupt handler calls nvme_complete_async_event()
but the latter may call some blocking functions. Sleeping functions
can't be called in interrupt context.
BUG: sleeping function called from invalid context
in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/15
Call Trace:
<IRQ>
__cancel_work_timer+0x31e/0x460
? nvme_change_ctrl_state+0xcf/0x3c0 [nvme_core]
? nvme_change_ctrl_state+0xcf/0x3c0 [nvme_core]
nvme_complete_async_event+0x365/0x480 [nvme_core]
nvme_poll_cq+0x262/0xe50 [nvme]
Fix the bug by deferring the call to nvme_complete_async_event() to
the nvme_wq workqueue, add a wait_queue to be sure there are no async
events waiting to be completed before stopping the controller.
Signed-off-by: Maurizio Lombardi <mlombard at redhat.com>
---
drivers/nvme/host/pci.c | 40 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 61af7ff1a9d6..4ac3d3606c4f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -139,6 +139,8 @@ struct nvme_dev {
u32 cmbsz;
u32 cmbloc;
struct nvme_ctrl ctrl;
+ wait_queue_head_t async_wq;
+ atomic_t async_completing;
u32 last_ps;
bool hmb;
@@ -161,6 +163,13 @@ struct nvme_dev {
unsigned int nr_poll_queues;
};
+struct nvme_compl_async_data {
+ struct work_struct cmpl_work;
+ struct nvme_dev *dev;
+ union nvme_result result;
+ __le16 status;
+};
+
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE,
@@ -1008,6 +1017,20 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
}
+static void nvme_complete_async_event_work(struct work_struct *work)
+{
+ struct nvme_compl_async_data *data;
+ struct nvme_dev *dev;
+
+ data = container_of(work, struct nvme_compl_async_data, cmpl_work);
+ dev = data->dev;
+
+ nvme_complete_async_event(&dev->ctrl, data->status, &data->result);
+ atomic_dec(&dev->async_completing);
+ wake_up(&dev->async_wq);
+ kfree(data);
+}
+
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
struct io_comp_batch *iob, u16 idx)
{
@@ -1022,8 +1045,18 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
* for them but rather special case them here.
*/
if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) {
- nvme_complete_async_event(&nvmeq->dev->ctrl,
- cqe->status, &cqe->result);
+ struct nvme_compl_async_data *data;
+
+ data = kmalloc(sizeof(*data), GFP_ATOMIC);
+ if (!data)
+ return;
+
+ INIT_WORK(&data->cmpl_work, nvme_complete_async_event_work);
+ data->dev = nvmeq->dev;
+ data->status = cqe->status;
+ data->result = cqe->result;
+ atomic_inc(&nvmeq->dev->async_completing);
+ queue_work(nvme_wq, &data->cmpl_work);
return;
}
@@ -2933,6 +2966,8 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
mutex_init(&dev->shutdown_lock);
+ init_waitqueue_head(&dev->async_wq);
+ atomic_set(&dev->async_completing, 0);
dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues;
dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1;
@@ -3135,6 +3170,7 @@ static void nvme_remove(struct pci_dev *pdev)
}
flush_work(&dev->ctrl.reset_work);
+ wait_event(dev->async_wq, !atomic_read(&dev->async_completing));
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
--
2.39.3
More information about the Linux-nvme
mailing list