[PATCH] nvme-multipath: add an 'ana_groups_only' module option

Hannes Reinecke hare at suse.de
Mon Feb 7 02:00:05 PST 2022


On large installations the ANA log buffer can be exceedingly large;
we've come across a controller with 49 ANA Group Descriptors and
65536 namespaces, resulting in an ANA buffer with an order-7 allocation.
And this is just to validate that the namespace ID is _really_listed
in the log page.
So to avoid an overly large memory allocation we can leverage the
'RGO' bit when retrieving the ANA log page, and check whether the
ANA group ID from the namespace is found in the ANA descriptors.
That cuts down the memory allocation, and provides the same result.
But to be on the safe side I've added a module option 'ana_groups_only'
to switch between modes.

Signed-off-by: Hannes Reinecke <hare at suse.de>
---
 drivers/nvme/host/multipath.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 7f2071f2460c..bffa56c4fc83 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -13,6 +13,11 @@ module_param(multipath, bool, 0444);
 MODULE_PARM_DESC(multipath,
 	"turn on native support for multiple controllers per subsystem");
 
+static bool ana_groups_only = false;
+module_param(ana_groups_only, bool, 0644);
+MODULE_PARM_DESC(ana_groups_only,
+		 "Retrieve ANA Log page with groups only (RGO bit set)");
+
 void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
 {
 	struct nvme_ns_head *h;
@@ -556,13 +561,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
 	for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
 		struct nvme_ana_group_desc *desc = base + offset;
 		u32 nr_nsids;
-		size_t nsid_buf_size;
+		size_t nsid_buf_size = 0;
 
 		if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
 			return -EINVAL;
 
 		nr_nsids = le32_to_cpu(desc->nnsids);
-		nsid_buf_size = flex_array_size(desc, nsids, nr_nsids);
+		if (nr_nsids)
+			nsid_buf_size = flex_array_size(desc, nsids, nr_nsids);
 
 		if (WARN_ON_ONCE(desc->grpid == 0))
 			return -EINVAL;
@@ -617,8 +623,17 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
 	if (desc->state == NVME_ANA_CHANGE)
 		(*nr_change_groups)++;
 
-	if (!nr_nsids)
+	if (!nr_nsids) {
+		if (!ana_groups_only)
+			return 0;
+		down_read(&ctrl->namespaces_rwsem);
+		list_for_each_entry(ns, &ctrl->namespaces, list) {
+			if (ns->ana_grpid == le32_to_cpu(desc->grpid))
+				nvme_update_ns_ana_state(desc, ns);
+		}
+		up_read(&ctrl->namespaces_rwsem);
 		return 0;
+	}
 
 	down_read(&ctrl->namespaces_rwsem);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
@@ -644,7 +659,8 @@ static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
 	int error;
 
 	mutex_lock(&ctrl->ana_lock);
-	error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, NVME_CSI_NVM,
+	error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
+			ana_groups_only ? NVME_ANA_LOG_RGO : 0, NVME_CSI_NVM,
 			ctrl->ana_log_buf, ctrl->ana_log_size, 0);
 	if (error) {
 		dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
@@ -855,8 +871,10 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
 
 	ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
-		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
-		ctrl->max_namespaces * sizeof(__le32);
+		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
+	if (!ana_groups_only)
+		ana_log_size += ctrl->max_namespaces * sizeof(__le32);
+
 	if (ana_log_size > max_transfer_size) {
 		dev_err(ctrl->device,
 			"ANA log page size (%zd) larger than MDTS (%zd).\n",
-- 
2.29.2




More information about the Linux-nvme mailing list