[PATCH] KVM: arm64: Fix ICV_DIR_EL1 trapping detection for pKVM
Marc Zyngier
maz at kernel.org
Mon Mar 9 10:33:46 PDT 2026
On Mon, 09 Mar 2026 16:04:50 +0000,
Vincent Donnefort <vdonnefort at google.com> wrote:
>
> For non-VHE KVM, can_trap_icv_dir_el1() relies on a hyp-stub HVC to
> read the ICH_VTR_EL2 register. This isn't compatible with pKVM enabled
> devices which are failing late calls to verify_local_cpu_caps() when
> hotplugging a CPU.
>
> In verify_local_cpu_caps(), system_has_cap initialised before pKVM kills
> the hyp-stub is most likely set, while cpu_has_cap fails to probe the
> feature creates a capability conflict and prevents the CPU from going
> online.
>
> Add an HVC to get the ICH_VTR_EL2 register and use it in for ICV_DIR_EL1
> trapping detection.
>
> Signed-off-by: Vincent Donnefort <vdonnefort at google.com>
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index a1ad12c72ebf..81bac8faec44 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -81,6 +81,7 @@ enum __kvm_host_smccc_func {
> __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
> __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
> __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
> + __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2,
> __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
> __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
> __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index c31f8e17732a..0bca57c1cbe0 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2345,13 +2345,17 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
> !is_midr_in_range_list(has_vgic_v3))
> return false;
>
> - if (is_kernel_in_hyp_mode())
> + if (is_kernel_in_hyp_mode()) {
> res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
> - else
> + } else if (system_capabilities_finalized() && is_protected_kvm_enabled()) {
> + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2), &res);
> + if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
> + return false;
> + } else {
> arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
> -
> - if (res.a0 == HVC_STUB_ERR)
> - return false;
> + if (res.a0 == HVC_STUB_ERR)
> + return false;
> + }
>
> return res.a1 & ICH_VTR_EL2_TDS;
> }
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index e7790097db93..0432852228f9 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -463,6 +463,11 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
> cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
> }
>
> +static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt)
> +{
> + cpu_reg(host_ctxt, 1) = read_sysreg_s(SYS_ICH_VTR_EL2);
> +}
> +
> static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
> {
> __vgic_v3_init_lrs();
> @@ -622,6 +627,7 @@ static const hcall_t host_hcall[] = {
> HANDLE_FUNC(__kvm_timer_set_cntvoff),
> HANDLE_FUNC(__vgic_v3_save_aprs),
> HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
> + HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
> HANDLE_FUNC(__pkvm_reserve_vm),
> HANDLE_FUNC(__pkvm_unreserve_vm),
> HANDLE_FUNC(__pkvm_init_vm),
>
This looks incredibly complicated. Since pKVM forbids late onlining of
CPUs, you are absolutely sure that you have already seen the CPU being
hot-plugged on.
So it would make a lot more sense to just return the current value of
the property you are trying to re-evaluate: you know for sure it
cannot change under your feet.
I have quickly tested the following hack:
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c31f8e17732a3..947ff71b3b66b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2345,6 +2345,9 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
!is_midr_in_range_list(has_vgic_v3))
return false;
+ if (system_capabilities_finalized() && is_protected_kvm_enabled())
+ return cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR);
+
if (is_kernel_in_hyp_mode())
res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
else
which works for me. Could you please give it a go?
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list