[PATCH 09/24] scsi-multipath: failover handling

John Garry john.g.garry at oracle.com
Wed Feb 25 07:36:12 PST 2026


For a scmd which suffers failover, requeue the master bio of each bio
attached to its request.

A handler is added in the scsi_driver structure to lookup a
mpath_disk from a request. This is needed because the scsi_disk structure
will manage the mpath_disk, and the code core has no method to look this
up from the scsi_scmnd.

Failover occurs when the scsi_cmnd has failed and it is discovered that the
original scsi_device has transport down.

Signed-off-by: John Garry <john.g.garry at oracle.com>
---
 drivers/scsi/scsi_error.c     | 12 ++++++
 drivers/scsi/scsi_lib.c       |  9 +++-
 drivers/scsi/scsi_multipath.c | 80 +++++++++++++++++++++++++++++++++++
 include/scsi/scsi.h           |  1 +
 include/scsi/scsi_driver.h    |  3 ++
 include/scsi/scsi_multipath.h | 14 ++++++
 6 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f869108fd9693..0fd1b46764c3f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -40,6 +40,7 @@
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsi_dh.h>
 #include <scsi/scsi_devinfo.h>
+#include <scsi/scsi_multipath.h>
 #include <scsi/sg.h>
 
 #include "scsi_priv.h"
@@ -1901,12 +1902,16 @@ bool scsi_noretry_cmd(struct scsi_cmnd *scmd)
 enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd)
 {
 	enum scsi_disposition rtn;
+	struct request *req = scsi_cmd_to_rq(scmd);
 
 	/*
 	 * if the device is offline, then we clearly just pass the result back
 	 * up to the top level.
 	 */
 	if (!scsi_device_online(scmd->device)) {
+		if (scsi_is_mpath_request(req))
+			return scsi_mpath_failover_disposition(scmd);
+
 		SCSI_LOG_ERROR_RECOVERY(5, scmd_printk(KERN_INFO, scmd,
 			"%s: device offline - report as SUCCESS\n", __func__));
 		return SUCCESS;
@@ -2070,6 +2075,13 @@ enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd)
 
 maybe_retry:
 
+	/*
+	 * For SCSI Multipath check if there are path errors to
+	 * trigger failover to available path
+	 */
+	if (scsi_is_mpath_request(req))
+		return scsi_mpath_failover_disposition(scmd);
+
 	/* we requeue for retry because the error was retryable, and
 	 * the request was not marked fast fail.  Note that above,
 	 * even if the request is marked fast fail, we still requeue
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ab224cd61f3ae..7ed0defc8161e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1550,7 +1550,7 @@ static void scsi_complete(struct request *rq)
 		atomic_inc(&cmd->device->ioerr_cnt);
 
 	disposition = scsi_decide_disposition(cmd);
-	if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
+	if (disposition != SUCCESS && disposition != FAILOVER && scsi_cmd_runtime_exceeced(cmd))
 		disposition = SUCCESS;
 
 	scsi_log_completion(cmd, disposition);
@@ -1565,6 +1565,9 @@ static void scsi_complete(struct request *rq)
 	case ADD_TO_MLQUEUE:
 		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
 		break;
+	case FAILOVER:
+		scsi_mpath_failover_req(rq);
+		break;
 	default:
 		scsi_eh_scmd_add(cmd);
 		break;
@@ -1935,6 +1938,10 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
 		scsi_run_queue_async(sdev);
+		if (!scsi_device_online(sdev) && scsi_is_mpath_request(req)) {
+			scsi_mpath_failover_req(req);
+			return 0;
+		}
 		break;
 	}
 	return ret;
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index c3e0f792e921f..16b1f84fc552c 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -518,6 +518,86 @@ void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head)
 }
 EXPORT_SYMBOL_GPL(scsi_mpath_put_head);
 
+bool scsi_is_mpath_request(struct request *req)
+{
+	return is_mpath_request(req);
+}
+EXPORT_SYMBOL_GPL(scsi_is_mpath_request);
+
+static inline void bio_list_add_clone_master(struct bio_list *bl,
+				struct bio *clone)
+{
+	struct scsi_mpath_clone_bio *scsi_mpath_clone_bio;
+	struct bio *master_bio;
+
+	if (clone->bi_next)
+		bio_list_add_clone_master(bl, clone->bi_next);
+
+	scsi_mpath_clone_bio = scsi_mpath_to_master_bio(clone);
+	master_bio = scsi_mpath_clone_bio->master_bio;
+
+	if (bl->tail)
+		bl->tail->bi_next = master_bio;
+	else
+		bl->head = master_bio;
+
+	bl->tail = master_bio;
+
+	bio_put(clone);
+}
+
+void scsi_mpath_failover_req(struct request *req)
+{
+	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
+	struct scsi_device *sdev = scmd->device;
+	struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver);
+	struct mpath_disk *mpath_disk = drv->to_mpath_disk(req);
+	struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
+	struct mpath_head *mpath_head = mpath_disk->mpath_head;
+	unsigned long flags;
+
+	scsi_mpath_dev_clear_path(scsi_mpath_dev);
+
+	spin_lock_irqsave(&mpath_head->requeue_lock, flags);
+	bio_list_add_clone_master(&mpath_head->requeue_list, req->bio);
+	spin_unlock_irqrestore(&mpath_head->requeue_lock, flags);
+	req->bio = NULL;
+	req->biotail = NULL;
+	req->__data_len = 0;
+
+	/* End old request with clone detached */
+	scmd->result = 0;
+	blk_mq_end_request(req, 0);
+
+	kblockd_schedule_work(&mpath_head->requeue_work);
+}
+
+static inline bool scsi_is_mpath_error(struct scsi_cmnd *scmd)
+{
+	struct scsi_device *sdev = scmd->device;
+
+	if (sdev->sdev_state == SDEV_TRANSPORT_OFFLINE)
+		return true;
+	return false;
+}
+
+int scsi_mpath_failover_disposition(struct scsi_cmnd *scmd)
+{
+	struct request *req = scsi_cmd_to_rq(scmd);
+
+	if (is_mpath_request(req)) {
+		if (scsi_is_mpath_error(scmd) ||
+		    blk_queue_dying(req->q))
+			return FAILOVER;
+		return NEEDS_RETRY;
+	} else {
+		if (blk_queue_dying(req->q))
+			return SUCCESS;
+	}
+
+	return SUCCESS;
+}
+
 int __init scsi_multipath_init(void)
 {
 	return class_register(&scsi_mpath_device_class);
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 96b3503666703..544153a01b3fd 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -103,6 +103,7 @@ enum scsi_disposition {
 	TIMEOUT_ERROR		= 0x2007,
 	SCSI_RETURN_NOT_HANDLED	= 0x2008,
 	FAST_IO_FAIL		= 0x2009,
+	FAILOVER		= 0x2010,
 };
 
 /*
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index c0e89996bdb3f..85e792dc4db50 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -19,6 +19,9 @@ struct scsi_driver {
 	int (*done)(struct scsi_cmnd *);
 	int (*eh_action)(struct scsi_cmnd *, int);
 	void (*eh_reset)(struct scsi_cmnd *);
+	#ifdef CONFIG_SCSI_MULTIPATH
+	struct mpath_disk *(*to_mpath_disk)(struct request *);
+	#endif
 };
 #define to_scsi_driver(drv) \
 	container_of((drv), struct scsi_driver, gendrv)
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index 79e6860243e74..07db217edb085 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -43,6 +43,9 @@ struct scsi_mpath_device {
 #define to_scsi_mpath_device(d) \
 	container_of(d, struct scsi_mpath_device, mpath_device)
 
+void scsi_mpath_failover_req(struct request *);
+int scsi_mpath_failover_disposition(struct scsi_cmnd *);
+bool scsi_is_mpath_request(struct request *req);
 int scsi_mpath_dev_alloc(struct scsi_device *sdev);
 void scsi_mpath_dev_release(struct scsi_device *sdev);
 int scsi_multipath_init(void);
@@ -60,6 +63,17 @@ struct scsi_mpath_head {
 struct scsi_mpath_device {
 };
 
+static inline void scsi_mpath_failover_req(struct request *)
+{
+}
+static inline int scsi_mpath_failover_disposition(struct scsi_cmnd *)
+{
+	return 0;
+}
+static inline bool scsi_is_mpath_request(struct request *req)
+{
+	return false;
+}
 static inline int scsi_mpath_dev_alloc(struct scsi_device *sdev)
 {
 	return 0;
-- 
2.43.5




More information about the Linux-nvme mailing list