[PATCH] RISC-V: KVM: Fix TOCTOU race in SBI system suspend handler

Jiakai Xu xujiakai2025 at iscas.ac.cn
Thu May 21 07:20:30 PDT 2026


The SBI SUSP handler kvm_sbi_ext_susp_handler() checks that all other
vCPUs are stopped before entering system suspend, but it does not hold
mp_state_lock during the iteration. A concurrent HSM HART_START from
another vCPU can start a target vCPU after the SUSP handler has already
checked it, violating the invariant that all vCPUs must be stopped
before suspend.

Fix this with a two-phase approach:
1. Set a VM-wide suspend_in_progress flag before the iteration to block
   concurrent HSM HART_START. The HSM start handler checks this flag
   under its existing mp_state_lock, closing the race.
2. Hold mp_state_lock during each per-vCPU stopped check so that
   mp_state reads are ordered against concurrent power_on/power_off
   writes on the other side of the lock.

The flag is self-clearing: it resets when any vCPU re-enters
kvm_arch_vcpu_ioctl_run after the suspend-resume cycle completes.

Fixes: 023c15151fbb ("RISC-V: KVM: Add SBI system suspend support")
Signed-off-by: Jiakai Xu <jiakaiPeanut at gmail.com>
Signed-off-by: Jiakai Xu <xujiakai2025 at iscas.ac.cn>
Assisted-by: YuanSheng:DeepSeek-V3.2
---
 arch/riscv/include/asm/kvm_host.h |  3 +++
 arch/riscv/kvm/vcpu.c             |  8 ++++++++
 arch/riscv/kvm/vcpu_sbi_hsm.c     | 11 +++++++++++
 arch/riscv/kvm/vcpu_sbi_system.c  | 10 ++++++++++
 4 files changed, 32 insertions(+)

diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 75b0a951c1bc6..c4e710ec40f90 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -93,6 +93,9 @@ struct kvm_arch {
 
 	/* KVM_CAP_RISCV_MP_STATE_RESET */
 	bool mp_state_reset;
+
+	/* Set by SBI SUSP to block concurrent HSM HART_START during system suspend */
+	bool suspend_in_progress;
 };
 
 struct kvm_cpu_trap {
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index a73690eda84b5..ea6f14244addb 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -838,6 +838,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	/* Mark this VCPU ran at least once */
 	vcpu->arch.ran_atleast_once = true;
 
+	/*
+	 * Clear stale suspend flag from a previous suspend-resume cycle.
+	 * The flag was set by kvm_sbi_ext_susp_handler() and persists
+	 * across the userspace exit; clearing it here ensures subsequent
+	 * HSM HART_START operations are not blocked after resume.
+	 */
+	WRITE_ONCE(vcpu->kvm->arch.suspend_in_progress, false);
+
 	kvm_vcpu_srcu_read_lock(vcpu);
 
 	switch (run->exit_reason) {
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
index f26207f84bab6..2f88d93768bc8 100644
--- a/arch/riscv/kvm/vcpu_sbi_hsm.c
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -31,6 +31,17 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
+	/*
+	 * Reject HART_START while a system suspend is in progress.
+	 * kvm_sbi_ext_susp_handler() sets this flag before checking
+	 * that all vCPUs are stopped; checking it here under
+	 * mp_state_lock closes the race.
+	 */
+	if (READ_ONCE(target_vcpu->kvm->arch.suspend_in_progress)) {
+		ret = SBI_ERR_DENIED;
+		goto out;
+	}
+
 	kvm_riscv_vcpu_sbi_request_reset(target_vcpu, cp->a1, cp->a2);
 
 	__kvm_riscv_vcpu_power_on(target_vcpu);
diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c
index c6f7e609ac794..b79c1cff7a996 100644
--- a/arch/riscv/kvm/vcpu_sbi_system.c
+++ b/arch/riscv/kvm/vcpu_sbi_system.c
@@ -35,13 +35,23 @@ static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
 			return 0;
 		}
 
+		/*
+		 * Set the VM-wide flag to block concurrent HSM HART_START
+		 * from racing with the per-vCPU stopped checks below.
+		 */
+		WRITE_ONCE(vcpu->kvm->arch.suspend_in_progress, true);
+
 		kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
 			if (tmp == vcpu)
 				continue;
+			spin_lock(&tmp->arch.mp_state_lock);
 			if (!kvm_riscv_vcpu_stopped(tmp)) {
+				spin_unlock(&tmp->arch.mp_state_lock);
+				WRITE_ONCE(vcpu->kvm->arch.suspend_in_progress, false);
 				retdata->err_val = SBI_ERR_DENIED;
 				return 0;
 			}
+			spin_unlock(&tmp->arch.mp_state_lock);
 		}
 
 		kvm_riscv_vcpu_sbi_request_reset(vcpu, cp->a1, cp->a2);
-- 
2.34.1




More information about the linux-riscv mailing list