[PATCH] nvme-pci: let platform handle subsystem reset fallout

Keith Busch kbusch at meta.com
Mon Jun 24 09:07:56 PDT 2024


From: Keith Busch <kbusch at kernel.org>

Scheduling reset_work after a nvme subsystem reset is expected to fail,
but this also prevents potential handling the platform may provide from
successfully recovering the link without re-enumeration. Provide a pci
specific operation that safely initiates a subsystem reset, and instead
of scheduling reset work, read back the status register to trigger a
pcie read error.

Reported-by: Nilay Shroff <nilay at linux.ibm.com>
Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 drivers/nvme/host/nvme.h |  3 +++
 drivers/nvme/host/pci.c  | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 68b400f9c42d5..f581cb61a34d2 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -551,6 +551,7 @@ struct nvme_ctrl_ops {
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 	void (*submit_async_event)(struct nvme_ctrl *ctrl);
+	int (*subsystem_reset)(struct nvme_ctrl *ctrl);
 	void (*delete_ctrl)(struct nvme_ctrl *ctrl);
 	void (*stop_ctrl)(struct nvme_ctrl *ctrl);
 	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
@@ -653,6 +654,8 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
 
 	if (!ctrl->subsystem)
 		return -ENOTTY;
+	if (ctrl->ops->subsystem_reset)
+		return ctrl->ops->subsystem_reset(ctrl);
 	if (!nvme_wait_reset(ctrl))
 		return -EBUSY;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 102a9fb0c65ff..4465e06c5583b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1143,6 +1143,37 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
 	spin_unlock(&nvmeq->sq_lock);
 }
 
+static int nvme_pci_subsystem_reset(struct nvme_ctrl *ctrl)
+{
+	struct nvme_dev *dev = to_nvme_dev(ctrl);
+	int ret = 0;
+
+	/*
+	 * Taking the shutdown_lock ensures the iomap is not being altered by
+	 * reset_work. Holding this lock before the RESETTING state change, if
+	 * successful, also ensures nvme_remove won't be able to proceed to
+	 * iounmap until we're done.
+	 */
+	mutex_lock(&dev->shutdown_lock);
+	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	if (!dev->bar_mapped_size) {
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	writel(0x4E564D65, dev->bar + NVME_REG_NSSR);
+
+	/* Read back to trigger platform error handling, if any */
+	readl(dev->bar + NVME_REG_CSTS);
+unlock:
+	mutex_unlock(&dev->shutdown_lock);
+	return ret;
+}
+
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
 	struct nvme_command c = { };
@@ -2859,6 +2890,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
 	.reg_read64		= nvme_pci_reg_read64,
 	.free_ctrl		= nvme_pci_free_ctrl,
 	.submit_async_event	= nvme_pci_submit_async_event,
+	.subsystem_reset	= nvme_pci_subsystem_reset,
 	.get_address		= nvme_pci_get_address,
 	.print_device_info	= nvme_pci_print_device_info,
 	.supports_pci_p2pdma	= nvme_pci_supports_pci_p2pdma,
-- 
2.43.0




More information about the Linux-nvme mailing list