[PATCH] nvme: invalidate paths during rescan
Daniel Wagner
dwagner at suse.de
Thu Jul 29 12:28:14 PDT 2021
From: Hannes Reinecke <hare at suse.de>
When triggering a rescan due to a namespace resize we will be
receiving AENs on every controller, triggering a rescan of all
attached namespaces. If multipath is active only the current path and
the ns_head disk will be updated, the other paths will still refer to
the old size until AENs for the remaining controllers are received.
If I/O comes in before that it might be routed to one of the old
paths, triggering an I/O failure with 'access beyond end of device'.
With this patch the old paths are skipped from multipath path
selection until the controller serving these paths has been rescanned.
Signed-off-by: Hannes Reinecke <hare at suse.de>
Tested-by: Daniel Wagner <dwagner at suse.de>
Signed-off-by: Daniel Wagner <dwagner at suse.de>
---
Hi,
I was able to reproduce the exact scenario Hannes describes.
My setup is
- iopolicy: round-robin
- add 10 s sleep between the rescaning of each controller.
this increased the likelyhood to get I/O on both paths.
- increase the backing block device on the target (lvm)
- as there was no AEN, trigger rescan by 'nvme ns-rescan'
- monitor the two connection (tcp) via iptables on the
target
Here is some debug output from my session. I think you get the idea
where I placced the printks :)
nvme nvme17: rescanning namespaces.
nvme16n1: detected capacity change from 4603904 to 4808704
capacity = 4808704
get_capacity(ns->disk) = 4808704
get_capacity(ns->disk) = 4603904
ns 00000000d0596eba invalidated
nvme nvme16: rescanning namespaces.
capacity = 4808704
get_capacity(ns->disk) = 4808704
get_capacity(ns->disk) = 4808704
capacity = 4808704
get_capacity(ns->disk) = 4808704
get_capacity(ns->disk) = 4808704
Without the patch I got reliable 'access beyond', which was issued
by blkid. Also I could see the I/O traffic moved to one connection
and after the second rescan, the I/Os where evenly distributed
on the two connections again.
drivers/nvme/host/core.c | 2 ++
drivers/nvme/host/multipath.c | 16 ++++++++++++++++
drivers/nvme/host/nvme.h | 17 +++++++++++++++++
3 files changed, 35 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dfd9dec0c1f6..20a079083129 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1877,6 +1877,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
goto out_unfreeze;
}
+ clear_bit(NVME_NS_INVALIDATED, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
if (blk_queue_is_zoned(ns->queue)) {
@@ -1888,6 +1889,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
if (nvme_ns_head_multipath(ns->head)) {
blk_mq_freeze_queue(ns->head->disk->queue);
nvme_update_disk_info(ns->head->disk, ns, id);
+ nvme_mpath_invalidate_paths(ns);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
blk_queue_update_readahead(ns->head->disk->queue);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3f32c5e86bfc..579020ae058d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -147,6 +147,21 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
mutex_unlock(&ctrl->scan_lock);
}
+void nvme_mpath_invalidate_paths(struct nvme_ns *ns)
+{
+ struct nvme_ns_head *head = ns->head;
+ sector_t capacity = get_capacity(head->disk);
+ int node;
+
+ for_each_node(node)
+ rcu_assign_pointer(head->current_path[node], NULL);
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (capacity != get_capacity(ns->disk))
+ set_bit(NVME_NS_INVALIDATED, &ns->flags);
+ }
+}
+
static bool nvme_path_is_disabled(struct nvme_ns *ns)
{
/*
@@ -158,6 +173,7 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
ns->ctrl->state != NVME_CTRL_DELETING)
return true;
if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
+ test_bit(NVME_NS_INVALIDATED, &ns->flags) ||
test_bit(NVME_NS_REMOVING, &ns->flags))
return true;
return false;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5cd1fa3b8464..ea8933b55bba 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -467,6 +467,7 @@ struct nvme_ns {
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
#define NVME_NS_FORCE_RO 3
+#define NVME_NS_INVALIDATED 4
struct cdev cdev;
struct device cdev_device;
@@ -715,8 +716,18 @@ void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_invalidate_paths(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
+struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+ struct nvme_ns_head *head = ns->head;
+
+ if (head->disk && list_empty(&head->list))
+ kblockd_schedule_work(&head->requeue_work);
+}
static inline void nvme_trace_bio_complete(struct request *req)
{
@@ -762,12 +773,18 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
return false;
}
+static inline void nvme_mpath_invalidate_paths(struct nvme_ns *ns)
+{
+}
static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
{
}
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+}
static inline void nvme_trace_bio_complete(struct request *req)
{
}
--
2.29.2
More information about the Linux-nvme
mailing list