[PATCH] nvme: bound the freeze drain in passthrough commands

Chao Shi coshi036 at gmail.com
Tue May 26 22:59:23 PDT 2026


nvme_passthru_start() drains in-flight I/O via the unbounded
nvme_wait_freeze() before submitting a command with command-set
effects (Format NVM, Sanitize, Namespace Management, vendor unique).
If a completion is silently dropped or the device hangs, the calling
task wedges with ctrl->scan_lock and ctrl->subsys->lock held, fanning
out into hung-task reports on any concurrent open/close/passthru on
the same controller:

  INFO: task syz-executor:NNNN blocked for more than 123 seconds.
   nvme_wait_freeze+0x82/0x100
   nvme_passthru_start drivers/nvme/host/core.c:1249 [inline]
   nvme_submit_user_cmd+0x1ee/0x3d0 drivers/nvme/host/ioctl.c:189

The other freeze-drain sites (pci shutdown, tcp/rdma reset) already
bound the wait with nvme_wait_freeze_timeout(NVME_IO_TIMEOUT).  Apply
it here too; on timeout, unwind the freeze and return -EBUSY (or
NVME_SC_INTERNAL on the nvmet path) instead of submitting the command.

Found by FuzzNvme(Syzkaller with FEMU fuzzing framework).

Acked-by: Sungwoo Kim <iam at sung-woo.kim>
Acked-by: Dave Tian <daveti at purdue.edu>
Acked-by: Weidong Zhu <weizhu at fiu.edu>
Signed-off-by: Chao Shi <coshi036 at gmail.com>
---
 drivers/nvme/host/core.c       | 26 ++++++++++++++++++++------
 drivers/nvme/host/ioctl.c      |  7 ++++++-
 drivers/nvme/host/nvme.h       |  3 ++-
 drivers/nvme/target/passthru.c |  7 ++++++-
 4 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7bf228df6001..575f98b9a6cc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1232,23 +1232,37 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
 }
 EXPORT_SYMBOL_NS_GPL(nvme_command_effects, "NVME_TARGET_PASSTHRU");
 
-u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+int nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode,
+			u32 *effects)
 {
-	u32 effects = nvme_command_effects(ctrl, ns, opcode);
+	*effects = nvme_command_effects(ctrl, ns, opcode);
 
 	/*
 	 * For simplicity, IO to all namespaces is quiesced even if the command
-	 * effects say only one namespace is affected.
+	 * effects say only one namespace is affected.  Bound the drain wait so
+	 * a stuck I/O cannot wedge the passthrough caller (and any task on the
+	 * scan_lock or subsys lock) indefinitely; the other in-tree callers of
+	 * the freeze drain (pci shutdown, tcp/rdma reset) already use this same
+	 * NVME_IO_TIMEOUT bound.
 	 */
-	if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
+	if (*effects & NVME_CMD_EFFECTS_CSE_MASK) {
 		mutex_lock(&ctrl->scan_lock);
 		mutex_lock(&ctrl->subsys->lock);
 		nvme_mpath_start_freeze(ctrl->subsys);
 		nvme_mpath_wait_freeze(ctrl->subsys);
 		nvme_start_freeze(ctrl);
-		nvme_wait_freeze(ctrl);
+		if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
+			dev_warn(ctrl->device,
+				 "I/O did not drain in %u seconds; aborting passthrough\n",
+				 nvme_io_timeout);
+			nvme_unfreeze(ctrl);
+			nvme_mpath_unfreeze(ctrl->subsys);
+			mutex_unlock(&ctrl->subsys->lock);
+			mutex_unlock(&ctrl->scan_lock);
+			return -EBUSY;
+		}
 	}
-	return effects;
+	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, "NVME_TARGET_PASSTHRU");
 
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index a9c097dacad6..762458a23b38 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -186,7 +186,12 @@ static int nvme_submit_user_cmd(struct request_queue *q,
 	bio = req->bio;
 	ctrl = nvme_req(req)->ctrl;
 
-	effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
+	ret = nvme_passthru_start(ctrl, ns, cmd->common.opcode, &effects);
+	if (ret) {
+		if (bio)
+			blk_rq_unmap_user(bio);
+		goto out_free_req;
+	}
 	ret = nvme_execute_rq(req, false);
 	if (result)
 		*result = le64_to_cpu(nvme_req(req)->result.u64);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9a5f28c5103c..665d75de044e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -1211,7 +1211,8 @@ static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {};
 
 u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			 u8 opcode);
-u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode);
+int nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode,
+			u32 *effects);
 int nvme_execute_rq(struct request *rq, bool at_head);
 void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
 		       struct nvme_command *cmd, int status);
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 67c423a8b052..7b97bfc1ace6 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -220,7 +220,12 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
 	u32 effects;
 	int status;
 
-	effects = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode);
+	status = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode, &effects);
+	if (status) {
+		nvmet_req_complete(req, NVME_SC_INTERNAL);
+		blk_mq_free_request(rq);
+		return;
+	}
 	status = nvme_execute_rq(rq, false);
 	if (status == NVME_SC_SUCCESS &&
 	    req->cmd->common.opcode == nvme_admin_identify) {
-- 
2.43.0




More information about the Linux-nvme mailing list