[RFC PATCH] nvme: add 'queue_if_no_path' semantics

Hannes Reinecke hare at suse.de
Fri Oct 2 12:06:34 EDT 2020


Currently namespaces behave differently depending on the 'CMIC'
setting. If CMIC is zero, the device is removed once the last path
goes away. If CMIC has the multipath bit set, the device is retained
even if the last path is removed.
This is okay for fabrics, where one can do an explicit disconnect
to remove the device, but for nvme-pci this induces a regression
with PCI hotplug.
When the NVMe device is opened (eg by MD), the NVMe device is not
removed after a PCI hot-remove. Hence MD will not be notified about
the event, and will continue to consider this device as operational.
Consequently, upon PCI hot-add the device shows up as a new NVMe
device, and MD will fail to reattach the device.
So this patch adds NVME_NSHEAD_QUEUE_IF_NO_PATH flag to the nshead
to restore the original behaviour for non-fabrics NVMe devices.

Suggested-by: Keith Busch <keith.busch at wdc.com>
Signed-off-by: Hannes Reinecke <hare at suse.de>
---
 drivers/nvme/host/core.c      | 42 ++++++++++++++++++++++++++++++++++-
 drivers/nvme/host/multipath.c |  5 ++++-
 drivers/nvme/host/nvme.h      |  3 ++-
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 385b10317873..87589f0adea3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -474,7 +474,11 @@ static void nvme_free_ns_head(struct kref *ref)
 	struct nvme_ns_head *head =
 		container_of(ref, struct nvme_ns_head, ref);
 
-	nvme_mpath_remove_disk(head);
+	if (head->disk) {
+		if (test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags))
+			nvme_mpath_remove_disk(head);
+		put_disk(head->disk);
+	}
 	ida_simple_remove(&head->subsys->ns_ida, head->instance);
 	cleanup_srcu_struct(&head->srcu);
 	nvme_put_subsystem(head->subsys);
@@ -3345,12 +3349,42 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(nsid);
 
+static ssize_t queue_if_no_path_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvme_ns_head *head = dev_to_ns_head(dev);
+
+	return sprintf(buf, "%s\n",
+		       test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags) ?
+		       "on" : "off");
+}
+
+static ssize_t queue_if_no_path_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct nvme_ns_head *head = dev_to_ns_head(dev);
+	int queue_if_no_path, err;
+
+	err = kstrtoint(buf, 10, &queue_if_no_path);
+	if (err)
+		return -EINVAL;
+
+	else if (queue_if_no_path <= 0)
+		clear_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
+	else
+		set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
+	return count;
+}
+static DEVICE_ATTR(queue_if_no_path, S_IRUGO | S_IWUSR,
+	queue_if_no_path_show, queue_if_no_path_store);
+
 static struct attribute *nvme_ns_id_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
 	&dev_attr_nguid.attr,
 	&dev_attr_eui.attr,
 	&dev_attr_nsid.attr,
+	&dev_attr_queue_if_no_path.attr,
 #ifdef CONFIG_NVME_MULTIPATH
 	&dev_attr_ana_grpid.attr,
 	&dev_attr_ana_state.attr,
@@ -3377,6 +3411,10 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
 		if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
 			return 0;
 	}
+	if (a == &dev_attr_queue_if_no_path.attr) {
+		if (dev_to_disk(dev)->fops == &nvme_fops)
+			return 0;
+	}
 #ifdef CONFIG_NVME_MULTIPATH
 	if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
 		if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
@@ -3685,6 +3723,8 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	head->subsys = ctrl->subsys;
 	head->ns_id = nsid;
 	head->ids = *ids;
+	if (ctrl->ops->flags & NVME_F_FABRICS)
+		set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
 	kref_init(&head->ref);
 
 	ret = __nvme_check_ids(ctrl->subsys, head);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 74896be40c17..05c4fbc35bb2 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -682,6 +682,10 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 	if (!head->disk)
 		return;
+	if (test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags)) {
+		kblockd_schedule_work(&head->requeue_work);
+		return;
+	}
 	if (head->disk->flags & GENHD_FL_UP)
 		del_gendisk(head->disk);
 	blk_set_queue_dying(head->disk->queue);
@@ -697,7 +701,6 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 		 */
 		head->disk->queue = NULL;
 	}
-	put_disk(head->disk);
 }
 
 int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 566776100126..baf4a84918d8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -411,6 +411,7 @@ struct nvme_ns_head {
 	struct mutex		lock;
 	unsigned long		flags;
 #define NVME_NSHEAD_DISK_LIVE	0
+#define NVME_NSHEAD_QUEUE_IF_NO_PATH 1
 	struct nvme_ns __rcu	*current_path[];
 #endif
 };
@@ -670,7 +671,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 	struct nvme_ns_head *head = ns->head;
 
 	if (head->disk && list_empty(&head->list))
-		kblockd_schedule_work(&head->requeue_work);
+		nvme_mpath_remove_disk(head);
 }
 
 static inline void nvme_trace_bio_complete(struct request *req,
-- 
2.26.2




More information about the Linux-nvme mailing list