[PATCH v3 1/2] RISC-V: KVM: add KVM_CAP_RISCV_MP_STATE_RESET

Anup Patel anup at brainfault.org
Fri May 16 05:25:05 PDT 2025


On Thu, May 15, 2025 at 8:22 PM Radim Krčmář <rkrcmar at ventanamicro.com> wrote:
>
> Add a toggleable VM capability to reset the VCPU from userspace by
> setting MP_STATE_INIT_RECEIVED through IOCTL.
>
> Reset through a mp_state to avoid adding a new IOCTL.
> Do not reset on a transition from STOPPED to RUNNABLE, because it's
> better to avoid side effects that would complicate userspace adoption.
> The MP_STATE_INIT_RECEIVED is not a permanent mp_state -- IOCTL resets
> the VCPU while preserving the original mp_state -- because we wouldn't
> gain much from having a new state it in the rest of KVM, but it's a very
> non-standard use of the IOCTL.
>
> Signed-off-by: Radim Krčmář <rkrcmar at ventanamicro.com>
> ---
> If we want a permanent mp_state, I think that MP_STATE_UNINITIALIZED
> would be reasonable.  KVM could reset on transition to any other state.

Yes, MP_STATE_UNINITIALIZED looks better. I also suggest
that VCPU should be reset when set_mpstate() is called with
MP_STATE_UNINITIALIZED and the current state is
MP_STATE_STOPPED.

Regards,
Anup

>
> v3: do not allow allow userspace to set the HSM reset state [Anup]
> v2: new
> ---
>  Documentation/virt/kvm/api.rst        | 11 +++++++++++
>  arch/riscv/include/asm/kvm_host.h     |  3 +++
>  arch/riscv/include/asm/kvm_vcpu_sbi.h |  1 +
>  arch/riscv/kvm/vcpu.c                 | 27 ++++++++++++++-------------
>  arch/riscv/kvm/vcpu_sbi.c             | 17 +++++++++++++++++
>  arch/riscv/kvm/vm.c                   | 13 +++++++++++++
>  include/uapi/linux/kvm.h              |  1 +
>  7 files changed, 60 insertions(+), 13 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 47c7c3f92314..e107694fb41f 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -8496,6 +8496,17 @@ aforementioned registers before the first KVM_RUN. These registers are VM
>  scoped, meaning that the same set of values are presented on all vCPUs in a
>  given VM.
>
> +7.43 KVM_CAP_RISCV_MP_STATE_RESET
> +---------------------------------
> +
> +:Architectures: riscv
> +:Type: VM
> +:Parameters: None
> +:Returns: 0 on success, -EINVAL if arg[0] is not zero
> +
> +When this capability is enabled, KVM resets the VCPU when setting
> +MP_STATE_INIT_RECEIVED through IOCTL.  The original MP_STATE is preserved.
> +
>  8. Other capabilities.
>  ======================
>
> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
> index f673ebfdadf3..85cfebc32e4c 100644
> --- a/arch/riscv/include/asm/kvm_host.h
> +++ b/arch/riscv/include/asm/kvm_host.h
> @@ -119,6 +119,9 @@ struct kvm_arch {
>
>         /* AIA Guest/VM context */
>         struct kvm_aia aia;
> +
> +       /* KVM_CAP_RISCV_MP_STATE_RESET */
> +       bool mp_state_reset;
>  };
>
>  struct kvm_cpu_trap {
> diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
> index da28235939d1..439ab2b3534f 100644
> --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h
> +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
> @@ -57,6 +57,7 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
>                                      u32 type, u64 flags);
>  void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu,
>                                       unsigned long pc, unsigned long a1);
> +void kvm_riscv_vcpu_sbi_load_reset_state(struct kvm_vcpu *vcpu);
>  int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
>  int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
>                                    const struct kvm_one_reg *reg);
> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
> index a78f9ec2fa0e..521cd41bfffa 100644
> --- a/arch/riscv/kvm/vcpu.c
> +++ b/arch/riscv/kvm/vcpu.c
> @@ -51,11 +51,11 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
>                        sizeof(kvm_vcpu_stats_desc),
>  };
>
> -static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu)
> +static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu,
> +                                        bool kvm_sbi_reset)
>  {
>         struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
>         struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
> -       struct kvm_vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
>         void *vector_datap = cntx->vector.datap;
>
>         memset(cntx, 0, sizeof(*cntx));
> @@ -65,13 +65,8 @@ static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu)
>         /* Restore datap as it's not a part of the guest context. */
>         cntx->vector.datap = vector_datap;
>
> -       /* Load SBI reset values */
> -       cntx->a0 = vcpu->vcpu_id;
> -
> -       spin_lock(&reset_state->lock);
> -       cntx->sepc = reset_state->pc;
> -       cntx->a1 = reset_state->a1;
> -       spin_unlock(&reset_state->lock);
> +       if (kvm_sbi_reset)
> +               kvm_riscv_vcpu_sbi_load_reset_state(vcpu);
>
>         /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
>         cntx->sstatus = SR_SPP | SR_SPIE;
> @@ -84,7 +79,7 @@ static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu)
>         csr->scounteren = 0x7;
>  }
>
> -static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
> +static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset)
>  {
>         bool loaded;
>
> @@ -100,7 +95,7 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
>
>         vcpu->arch.last_exit_cpu = -1;
>
> -       kvm_riscv_vcpu_context_reset(vcpu);
> +       kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset);
>
>         kvm_riscv_vcpu_fp_reset(vcpu);
>
> @@ -177,7 +172,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>         kvm_riscv_vcpu_sbi_init(vcpu);
>
>         /* Reset VCPU */
> -       kvm_riscv_reset_vcpu(vcpu);
> +       kvm_riscv_reset_vcpu(vcpu, false);
>
>         return 0;
>  }
> @@ -526,6 +521,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
>         case KVM_MP_STATE_STOPPED:
>                 __kvm_riscv_vcpu_power_off(vcpu);
>                 break;
> +       case KVM_MP_STATE_INIT_RECEIVED:
> +               if (vcpu->kvm->arch.mp_state_reset)
> +                       kvm_riscv_reset_vcpu(vcpu, false);
> +               else
> +                       ret = -EINVAL;
> +               break;
>         default:
>                 ret = -EINVAL;
>         }
> @@ -714,7 +715,7 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
>                 }
>
>                 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
> -                       kvm_riscv_reset_vcpu(vcpu);
> +                       kvm_riscv_reset_vcpu(vcpu, true);
>
>                 if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
>                         kvm_riscv_gstage_update_hgatp(vcpu);
> diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
> index 0afef0bb261d..31fd3cc98d66 100644
> --- a/arch/riscv/kvm/vcpu_sbi.c
> +++ b/arch/riscv/kvm/vcpu_sbi.c
> @@ -167,6 +167,23 @@ void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu,
>         kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
>  }
>
> +void kvm_riscv_vcpu_sbi_load_reset_state(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
> +       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
> +       struct kvm_vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
> +
> +       cntx->a0 = vcpu->vcpu_id;
> +
> +       spin_lock(&vcpu->arch.reset_state.lock);
> +       cntx->sepc = reset_state->pc;
> +       cntx->a1 = reset_state->a1;
> +       spin_unlock(&vcpu->arch.reset_state.lock);
> +
> +       cntx->sstatus &= ~SR_SIE;
> +       csr->vsatp = 0;
> +}
> +
>  int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
>         struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
> diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
> index 7396b8654f45..b27ec8f96697 100644
> --- a/arch/riscv/kvm/vm.c
> +++ b/arch/riscv/kvm/vm.c
> @@ -209,6 +209,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>         return r;
>  }
>
> +int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +       switch (cap->cap) {
> +       case KVM_CAP_RISCV_MP_STATE_RESET:
> +               if (cap->flags)
> +                       return -EINVAL;
> +               kvm->arch.mp_state_reset = true;
> +               return 0;
> +       default:
> +               return -EINVAL;
> +       }
> +}
> +
>  int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
>  {
>         return -EINVAL;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index b6ae8ad8934b..454b7d4a0448 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -930,6 +930,7 @@ struct kvm_enable_cap {
>  #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
>  #define KVM_CAP_X86_GUEST_MODE 238
>  #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
> +#define KVM_CAP_RISCV_MP_STATE_RESET 240
>
>  struct kvm_irq_routing_irqchip {
>         __u32 irqchip;
> --
> 2.49.0
>



More information about the kvm-riscv mailing list