nvme/pcie hot plug results in /dev name change
Christoph Hellwig
hch at infradead.org
Tue Jan 31 22:27:24 PST 2023
On Wed, Feb 01, 2023 at 10:33:15AM +0800, Ming Lei wrote:
> > The native nvme multipath looks like it could be leveraged to improving that
> > user experience if we wanted to make that layer an option for non-multipath
> > devices.
>
> Can you share the basic idea? Will nvme mulitpath hold the io error and
> not propagate to upper layer until new device is probed? What if the
> new device is probed late, and IO has been timed out and err is returned
> to upper layer?
Attached is what I did, but I'm really not sure it is the right final
approach.
-------------- next part --------------
>From 7016fc46f29463871b68348088384b3d0ceeba91 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch at lst.de>
Date: Sun, 30 Oct 2022 18:17:28 +0100
Subject: nvme: split ANA attribute handling from nvme_ns_id_attr_group
Use a separate attribute group for the ANA attributes that aren't shown
on the multipath-device instead of just hiding them at runtime.
While this is a little more code, it keeps the separation clear.
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 22 +++++++-------------
drivers/nvme/host/multipath.c | 38 ++++++++++++++++++++++++++++++++---
drivers/nvme/host/nvme.h | 14 ++-----------
3 files changed, 44 insertions(+), 30 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 14568ae308fba5..2eab4d1b521edb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3432,10 +3432,6 @@ static struct attribute *nvme_ns_id_attrs[] = {
&dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
-#ifdef CONFIG_NVME_MULTIPATH
- &dev_attr_ana_grpid.attr,
- &dev_attr_ana_state.attr,
-#endif
NULL,
};
@@ -3458,24 +3454,20 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
return 0;
}
-#ifdef CONFIG_NVME_MULTIPATH
- if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
- if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
- return 0;
- if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
- return 0;
- }
-#endif
+
return a->mode;
}
-static const struct attribute_group nvme_ns_id_attr_group = {
+const struct attribute_group nvme_ns_id_attr_group = {
.attrs = nvme_ns_id_attrs,
.is_visible = nvme_ns_id_attrs_are_visible,
};
-const struct attribute_group *nvme_ns_id_attr_groups[] = {
+static const struct attribute_group *nvme_disk_attr_groups[] = {
&nvme_ns_id_attr_group,
+#ifdef CONFIG_NVME_MULTIPATH
+ &nvme_ns_ana_attr_group,
+#endif
NULL,
};
@@ -4250,7 +4242,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
up_write(&ctrl->namespaces_rwsem);
nvme_get_ctrl(ctrl);
- if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups))
+ if (device_add_disk(ctrl->device, ns->disk, nvme_disk_attr_groups))
goto out_cleanup_ns_from_list;
if (!nvme_ns_head_multipath(ns->head))
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0ea7e441e080f2..96ea23f1e374f0 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -50,6 +50,11 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
subsys->iopolicy = iopolicy;
}
+static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
+{
+ return ctrl->ana_log_buf != NULL;
+}
+
void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;
@@ -443,6 +448,11 @@ static const struct file_operations nvme_ns_head_chr_fops = {
.uring_cmd_iopoll = nvme_ns_head_chr_uring_cmd_iopoll,
};
+static const struct attribute_group *nvme_mpath_disk_attr_groups[] = {
+ &nvme_ns_id_attr_group,
+ NULL,
+};
+
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
{
int ret;
@@ -539,7 +549,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
*/
if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
rc = device_add_disk(&head->subsys->dev, head->disk,
- nvme_ns_id_attr_groups);
+ nvme_mpath_disk_attr_groups);
if (rc) {
clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags);
return;
@@ -780,7 +790,7 @@ static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
{
return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
}
-DEVICE_ATTR_RO(ana_grpid);
+static DEVICE_ATTR_RO(ana_grpid);
static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -789,7 +799,29 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
}
-DEVICE_ATTR_RO(ana_state);
+static DEVICE_ATTR_RO(ana_state);
+
+static struct attribute *nvme_ns_ana_attrs[] = {
+ &dev_attr_ana_grpid.attr,
+ &dev_attr_ana_state.attr,
+ NULL,
+};
+
+static umode_t nvme_ns_ana_attr_is_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+
+ if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
+ return 0;
+
+ return a->mode;
+}
+
+const struct attribute_group nvme_ns_ana_attr_group = {
+ .attrs = nvme_ns_ana_attrs,
+ .is_visible = nvme_ns_ana_attr_is_visible,
+};
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 00d3b438efe3f3..c656643ba0eead 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -855,17 +855,13 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
-extern const struct attribute_group *nvme_ns_id_attr_groups[];
+extern const struct attribute_group nvme_ns_id_attr_group;
+extern const struct attribute_group nvme_ns_ana_attr_group;
extern const struct pr_ops nvme_pr_ops;
extern const struct block_device_operations nvme_ns_head_ops;
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
#ifdef CONFIG_NVME_MULTIPATH
-static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
-{
- return ctrl->ana_log_buf != NULL;
-}
-
void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
@@ -894,16 +890,10 @@ static inline void nvme_trace_bio_complete(struct request *req)
}
extern bool multipath;
-extern struct device_attribute dev_attr_ana_grpid;
-extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;
#else
#define multipath false
-static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
-{
- return false;
-}
static inline void nvme_failover_req(struct request *req)
{
}
--
2.39.0
-------------- next part --------------
>From 76e56c0c5a1e6bb3d9d4d11255a5bc4db634e28b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch at lst.de>
Date: Sun, 30 Oct 2022 18:39:53 +0100
Subject: nvme: allow pinning shared namespaces
Add a sysfs attribute to pin a ns_head. While pinned the ns_head
will not go away even if no namespace currently exists.
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 11 ++++--
drivers/nvme/host/multipath.c | 63 +++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 1 +
3 files changed, 72 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2eab4d1b521edb..32f2e394dd6dc9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3897,8 +3897,12 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl,
*/
if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h))
continue;
- if (!list_empty(&h->list) && nvme_tryget_ns_head(h))
- return h;
+ if (list_empty(&h->list) &&
+ !test_bit(NVME_NSHEAD_PINNED, &h->flags))
+ continue;
+ if (!nvme_tryget_ns_head(h))
+ continue;
+ return h;
}
return NULL;
@@ -4294,7 +4298,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_lock(&ns->ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
- if (list_empty(&ns->head->list)) {
+ if (list_empty(&ns->head->list) &&
+ !test_bit(NVME_NSHEAD_PINNED, &ns->head->flags)) {
list_del_init(&ns->head->entry);
last_path = true;
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 96ea23f1e374f0..9dbbd3a4398598 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -448,8 +448,71 @@ static const struct file_operations nvme_ns_head_chr_fops = {
.uring_cmd_iopoll = nvme_ns_head_chr_uring_cmd_iopoll,
};
+static ssize_t pin_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct nvme_ns_head *head = disk->private_data;
+
+ return sysfs_emit(buf, "%d\n",
+ test_bit(NVME_NSHEAD_PINNED, &head->flags));
+}
+
+static ssize_t pin_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct nvme_ns_head *head = disk->private_data;
+ bool shutdown = false;
+ ssize_t ret = 0;
+ bool pin;
+
+ ret = kstrtobool(buf, &pin);
+ if (ret)
+ return ret;
+
+ if (pin) {
+ if (!nvme_tryget_ns_head(head))
+ return -EINVAL;
+ if (test_and_set_bit(NVME_NSHEAD_PINNED, &head->flags)) {
+ ret = -EINVAL;
+ goto out_put_head;
+ }
+ return count;
+ }
+
+ if (!test_and_clear_bit(NVME_NSHEAD_PINNED, &head->flags))
+ return -EINVAL;
+
+ mutex_lock(&head->subsys->lock);
+ if (list_empty(&head->list)) {
+ list_del_init(&head->entry);
+ shutdown = true;
+ }
+ mutex_unlock(&head->subsys->lock);
+ if (shutdown)
+ nvme_mpath_shutdown_disk(head);
+
+out_put_head:
+ nvme_put_ns_head(head);
+ if (ret)
+ return ret;
+ return count;
+}
+static DEVICE_ATTR_RW(pin);
+
+static struct attribute *nvme_mpath_disk_attrs[] = {
+ &dev_attr_pin.attr,
+ NULL,
+};
+
+const struct attribute_group nvme_mpath_disk_attr_group = {
+ .attrs = nvme_mpath_disk_attrs,
+};
+
static const struct attribute_group *nvme_mpath_disk_attr_groups[] = {
&nvme_ns_id_attr_group,
+ &nvme_mpath_disk_attr_group,
NULL,
};
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index c656643ba0eead..1e0ec8ef4d842a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -442,6 +442,7 @@ struct nvme_ns_head {
struct mutex lock;
unsigned long flags;
#define NVME_NSHEAD_DISK_LIVE 0
+#define NVME_NSHEAD_PINNED 1
struct nvme_ns __rcu *current_path[];
#endif
};
--
2.39.0
-------------- next part --------------
>From 9ceb1ab63aeab757dab5f78f62a2b467dc34e103 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch at lst.de>
Date: Mon, 31 Oct 2022 10:34:44 +0100
Subject: HACK: allow running multipath code for non-shared namespaces
---
drivers/nvme/host/core.c | 4 ++--
drivers/nvme/host/multipath.c | 3 +--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 32f2e394dd6dc9..3e4ddbc373db80 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1447,7 +1447,7 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
if (ret)
return ret;
info->anagrpid = id->anagrpid;
- info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
+ info->is_shared = true;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = true;
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
@@ -1483,7 +1483,7 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
if (!ret) {
info->anagrpid = id->anagrpid;
- info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
+ info->is_shared = true;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 9dbbd3a4398598..0cb8d5c853156e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -562,8 +562,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
* We also do this for private namespaces as the namespace sharing flag
* could change after a rescan.
*/
- if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
- !nvme_is_unique_nsid(ctrl, head) || !multipath)
+ if (!nvme_is_unique_nsid(ctrl, head) || !multipath)
return 0;
head->disk = blk_alloc_disk(ctrl->numa_node);
--
2.39.0
More information about the Linux-nvme
mailing list