[PATCH] nvme: Don't fail to resume if NSIDs change

Mario Limonciello mario.limonciello at amd.com
Mon Jul 31 11:51:03 PDT 2023


Samsung PM9B1 has problems after resume because NSID has changed.
This has been reported in the past on OEM varities of PM9B1 parts
and fixed by firmware updates on 'some' of those parts.

However this same issue also happens on 'retail' PM9B1 parts which
Samsung has not released firmware updates for.

As the check has been relaxed at startup for multiple disks with
duplicate NSIDs with commit ac522fc6c3165 ("nvme: don't reject
probe due to duplicate IDs for single-ported PCIe devices") also
relax the check that runs on resume for NSIDs and mark them bogus
if this occurs on resume.

Fixes: 1d5df6af8c74 ("nvme: don't blindly overwrite identifiers on disk revalidate")
Cc: stable at vger.kernel.org # 6.1+
Cc: Nils Kruse <nilskruse97 at gmail.com>
Cc: August Wikerfors <git at augustwikerfors.se>
Cc: David Chang <David.Chang at amd.com>
Link: https://github.com/tomsom/yoga-linux/issues/9
Link: https://lore.kernel.org/linux-nvme/b99a5149-c3d6-2a9b-1298-576a1b4b22c1@gmail.com/
Link: https://lore.kernel.org/all/20221116171727.4083-1-git@augustwikerfors.se/t/
Link: https://lore.kernel.org/all/d0ce0f3b-9407-9207-73a4-3536f0948653@augustwikerfors.se/
Signed-off-by: Mario Limonciello <mario.limonciello at amd.com>
---
 drivers/nvme/host/core.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37b6fa7466620..fc85b4cd11fa2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3423,6 +3423,16 @@ static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this,
 	return ret;
 }
 
+static void nvme_mark_nid_bogus(struct nvme_ns *ns, struct nvme_ns_info *info)
+{
+	dev_warn(ns->ctrl->device,
+		 "use of /dev/disk/by-id/ may cause data corruption\n");
+	memset(&info->ids.nguid, 0, sizeof(info->ids.nguid));
+	memset(&info->ids.uuid, 0, sizeof(info->ids.uuid));
+	memset(&info->ids.eui64, 0, sizeof(info->ids.eui64));
+	ns->ctrl->quirks |= NVME_QUIRK_BOGUS_NID;
+}
+
 static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
@@ -3459,12 +3469,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
 
 		dev_err(ctrl->device,
 			"clearing duplicate IDs for nsid %d\n", info->nsid);
-		dev_err(ctrl->device,
-			"use of /dev/disk/by-id/ may cause data corruption\n");
-		memset(&info->ids.nguid, 0, sizeof(info->ids.nguid));
-		memset(&info->ids.uuid, 0, sizeof(info->ids.uuid));
-		memset(&info->ids.eui64, 0, sizeof(info->ids.eui64));
-		ctrl->quirks |= NVME_QUIRK_BOGUS_NID;
+		nvme_mark_nid_bogus(ns, info);
 	}
 
 	mutex_lock(&ctrl->subsys->lock);
@@ -3706,14 +3711,14 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
 {
 	int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
 
-	if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
+	if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids) &&
+	    !(ns->ctrl->quirks & NVME_QUIRK_BOGUS_NID)) {
 		dev_err(ns->ctrl->device,
 			"identifiers changed for nsid %d\n", ns->head->ns_id);
-		goto out;
+		nvme_mark_nid_bogus(ns, info);
 	}
 
 	ret = nvme_update_ns_info(ns, info);
-out:
 	/*
 	 * Only remove the namespace if we got a fatal error back from the
 	 * device, otherwise ignore the error and just move on.
-- 
2.34.1




More information about the Linux-nvme mailing list