[PATCH v2] nvme-fc: FPIN link integrity handling
hare at kernel.org
hare at kernel.org
Tue Apr 2 02:30:31 PDT 2024
From: Hannes Reinecke <hare at suse.de>
FPIN LI (link integrity) messages are received when the attached
fabric detects hardware errors. In response to these messages I/O
should be directed away from the affected ports, and only used
if the 'optimized' paths are unavailable.
Upon port reset the paths should be put back in service as the
affected hardware might have been replaced.
This patch adds a new controller flag 'NVME_CTRL_MARGINAL'
which will be checked during multipath path selection, causing the
path to be skipped when checking for 'optimized' paths. If no
optimized paths are available the 'marginal' paths are considered
for path selection alongside the 'non-optimized' paths.
Changes to the original submission:
- Changed flag name to 'marginal'
- Do not block marginal path; influence path selection instead
to de-prioritize marginal paths
Signed-off-by: Hannes Reinecke <hare at suse.de>
---
drivers/nvme/host/core.c | 1 +
drivers/nvme/host/fc.c | 108 +++++++++++++++++++++++++++++++++
drivers/nvme/host/multipath.c | 17 ++++--
drivers/nvme/host/nvme.h | 6 ++
drivers/scsi/lpfc/lpfc_els.c | 6 +-
drivers/scsi/qla2xxx/qla_isr.c | 1 +
include/linux/nvme-fc-driver.h | 3 +
7 files changed, 135 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 57b664d12863..f5987a764a01 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4606,6 +4606,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
ctrl->passthru_err_log_enabled = false;
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+ clear_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 68a5d971657b..4602d5611d62 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -786,6 +786,9 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);
+ /* clear 'marginal' flag as controller will be reset */
+ clear_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
+
switch (nvme_ctrl_state(&ctrl->ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
@@ -3739,6 +3742,111 @@ static struct nvmf_transport_ops nvme_fc_transport = {
.create_ctrl = nvme_fc_create_ctrl,
};
+static struct nvme_fc_rport *nvme_fc_rport_from_wwpn(struct nvme_fc_lport *lport,
+ u64 rport_wwpn)
+{
+ struct nvme_fc_rport *rport;
+
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
+ if (!nvme_fc_rport_get(rport))
+ continue;
+ if (rport->remoteport.port_name == rport_wwpn &&
+ rport->remoteport.port_role & FC_PORT_ROLE_NVME_TARGET)
+ return rport;
+ nvme_fc_rport_put(rport);
+ }
+ return NULL;
+}
+
+/*
+ * nvme_fc_fpin_li_lport_update - routine to update Link Integrity
+ * event statistics.
+ * @lport: local port the FPIN was received on
+ * @tlv: pointer to link integrity descriptor
+ *
+ */
+static void
+nvme_fc_fpin_li_lport_update(struct nvme_fc_lport *lport, struct fc_tlv_desc *tlv)
+{
+ unsigned int i, pname_count;
+ struct nvme_fc_rport *attached_rport;
+ struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv;
+ u64 wwpn;
+
+ wwpn = be64_to_cpu(li_desc->attached_wwpn);
+ attached_rport = nvme_fc_rport_from_wwpn(lport, wwpn);
+ pname_count = be32_to_cpu(li_desc->pname_count);
+
+ for (i = 0; pname_count; i++) {
+ struct nvme_fc_rport *rport;
+
+ wwpn = be64_to_cpu(li_desc->pname_list[i]);
+ rport = nvme_fc_rport_from_wwpn(lport, wwpn);
+ if (!rport)
+ continue;
+ if (rport != attached_rport) {
+ struct nvme_fc_ctrl *ctrl;
+
+ spin_lock_irq(&rport->lock);
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+ set_bit(NVME_CTRL_MARGINAL, &ctrl->ctrl.flags);
+ spin_unlock_irq(&rport->lock);
+ }
+ nvme_fc_rport_put(rport);
+ }
+ if (attached_rport) {
+ struct nvme_fc_ctrl *ctrl;
+
+ spin_lock_irq(&attached_rport->lock);
+ list_for_each_entry(ctrl, &attached_rport->ctrl_list, ctrl_list)
+ set_bit(NVME_CTRL_MARGINAL, &ctrl->ctrl.flags);
+ spin_unlock_irq(&attached_rport->lock);
+ nvme_fc_rport_put(attached_rport);
+ }
+}
+
+/**
+ * fc_host_fpin_rcv - routine to process a received FPIN.
+ * @localport: local port the FPIN was received on
+ * @fpin_len: length of FPIN payload, in bytes
+ * @fpin_buf: pointer to FPIN payload
+ * Notes:
+ * This routine assumes no locks are held on entry.
+ */
+void
+nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+ u32 fpin_len, char *fpin_buf)
+{
+ struct nvme_fc_lport *lport;
+ struct fc_els_fpin *fpin = (struct fc_els_fpin *)fpin_buf;
+ struct fc_tlv_desc *tlv;
+ u32 bytes_remain;
+ u32 dtag;
+
+ if (!localport)
+ return;
+ lport = localport_to_lport(localport);
+ tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0];
+ bytes_remain = fpin_len - offsetof(struct fc_els_fpin, fpin_desc);
+ bytes_remain = min_t(u32, bytes_remain, be32_to_cpu(fpin->desc_len));
+
+ while (bytes_remain >= FC_TLV_DESC_HDR_SZ &&
+ bytes_remain >= FC_TLV_DESC_SZ_FROM_LENGTH(tlv)) {
+ dtag = be32_to_cpu(tlv->desc_tag);
+ switch (dtag) {
+ case ELS_DTAG_LNK_INTEGRITY:
+ nvme_fc_fpin_li_lport_update(lport, tlv);
+ break;
+ default:
+ break;
+ }
+
+ bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
+ tlv = fc_tlv_next_desc(tlv);
+ }
+}
+EXPORT_SYMBOL(nvme_fc_fpin_rcv);
+
/* Arbitrary successive failures max. With lots of subsystems could be high */
#define DISCOVERY_MAX_FAIL 20
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 007d695bd607..fe7612b440ca 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -282,11 +282,14 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
- if (distance < found_distance) {
- found_distance = distance;
- found = ns;
+ if (!nvme_ctrl_is_marginal(ns->ctrl)) {
+ if (distance < found_distance) {
+ found_distance = distance;
+ found = ns;
+ }
+ break;
}
- break;
+ fallthrough;
case NVME_ANA_NONOPTIMIZED:
if (distance < fallback_distance) {
fallback_distance = distance;
@@ -334,7 +337,8 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
if (ns->ana_state == NVME_ANA_OPTIMIZED) {
found = ns;
- goto out;
+ if (!nvme_ctrl_is_marginal(ns->ctrl))
+ goto out;
}
if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
found = ns;
@@ -361,7 +365,8 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE &&
- ns->ana_state == NVME_ANA_OPTIMIZED;
+ ns->ana_state == NVME_ANA_OPTIMIZED &&
+ !nvme_ctrl_is_marginal(ns->ctrl);
}
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 83c3870d5ed0..400c65655bdf 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -258,6 +258,7 @@ enum nvme_ctrl_flags {
NVME_CTRL_SKIP_ID_CNS_CS = 4,
NVME_CTRL_DIRTY_CAPABILITY = 5,
NVME_CTRL_FROZEN = 6,
+ NVME_CTRL_MARGINAL = 7,
};
struct nvme_ctrl {
@@ -399,6 +400,11 @@ static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
return READ_ONCE(ctrl->state);
}
+static inline bool nvme_ctrl_is_marginal(struct nvme_ctrl *ctrl)
+{
+ return test_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
+}
+
enum nvme_iopolicy {
NVME_IOPOLICY_NUMA,
NVME_IOPOLICY_RR,
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 4d723200690a..ecfe6bc8ab63 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -33,6 +33,7 @@
#include <scsi/scsi_transport_fc.h>
#include <uapi/scsi/fc/fc_fs.h>
#include <uapi/scsi/fc/fc_els.h>
+#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
@@ -10343,9 +10344,12 @@ lpfc_els_rcv_fpin(struct lpfc_vport *vport, void *p, u32 fpin_length)
fpin_length += sizeof(struct fc_els_fpin); /* the entire FPIN */
/* Send every descriptor individually to the upper layer */
- if (deliver)
+ if (deliver) {
fc_host_fpin_rcv(lpfc_shost_from_vport(vport),
fpin_length, (char *)fpin, 0);
+ nvme_fc_fpin_rcv(vport->localport,
+ fpin_length, (char *)fpin);
+ }
desc_cnt++;
}
}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d48007e18288..b180e10053c5 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -46,6 +46,7 @@ qla27xx_process_purex_fpin(struct scsi_qla_host *vha, struct purex_item *item)
pkt, pkt_size);
fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt, 0);
+ nvme_fc_fpin_rcv(vha->nvme_local_port, pkt_size, (char *)pkt);
}
const char *const port_state_str[] = {
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 89ea1ebd975a..b23d1e55737a 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -536,6 +536,9 @@ void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
u32 dev_loss_tmo);
+void nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+ u32 fpin_len, char *fpin_buf);
+
/*
* Routine called to pass a NVME-FC LS request, received by the lldd,
* to the nvme-fc transport.
--
2.35.3
More information about the Linux-nvme
mailing list