[PATCH] KVM: arm64: Fix ICV_DIR_EL1 trapping detection for pKVM

Marc Zyngier maz at kernel.org
Mon Mar 9 10:33:46 PDT 2026


On Mon, 09 Mar 2026 16:04:50 +0000,
Vincent Donnefort <vdonnefort at google.com> wrote:
> 
> For non-VHE KVM, can_trap_icv_dir_el1() relies on a hyp-stub HVC to
> read the ICH_VTR_EL2 register. This isn't compatible with pKVM enabled
> devices which are failing late calls to verify_local_cpu_caps() when
> hotplugging a CPU.
> 
> In verify_local_cpu_caps(), system_has_cap initialised before pKVM kills
> the hyp-stub is most likely set, while cpu_has_cap fails to probe the
> feature creates a capability conflict and prevents the CPU from going
> online.
> 
> Add an HVC to get the ICH_VTR_EL2 register and use it in for ICV_DIR_EL1
> trapping detection.
> 
> Signed-off-by: Vincent Donnefort <vdonnefort at google.com>
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index a1ad12c72ebf..81bac8faec44 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -81,6 +81,7 @@ enum __kvm_host_smccc_func {
>  	__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
>  	__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
>  	__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
> +	__KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2,
>  	__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
>  	__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
>  	__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index c31f8e17732a..0bca57c1cbe0 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2345,13 +2345,17 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
>  	    !is_midr_in_range_list(has_vgic_v3))
>  		return false;
>  
> -	if (is_kernel_in_hyp_mode())
> +	if (is_kernel_in_hyp_mode()) {
>  		res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
> -	else
> +	} else if (system_capabilities_finalized() && is_protected_kvm_enabled()) {
> +		arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2), &res);
> +		if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
> +			return false;
> +	} else {
>  		arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
> -
> -	if (res.a0 == HVC_STUB_ERR)
> -		return false;
> +		if (res.a0 == HVC_STUB_ERR)
> +			return false;
> +	}
>  
>  	return res.a1 & ICH_VTR_EL2_TDS;
>  }
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index e7790097db93..0432852228f9 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -463,6 +463,11 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
>  	cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
>  }
>  
> +static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt)
> +{
> +	cpu_reg(host_ctxt, 1) = read_sysreg_s(SYS_ICH_VTR_EL2);
> +}
> +
>  static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
>  {
>  	__vgic_v3_init_lrs();
> @@ -622,6 +627,7 @@ static const hcall_t host_hcall[] = {
>  	HANDLE_FUNC(__kvm_timer_set_cntvoff),
>  	HANDLE_FUNC(__vgic_v3_save_aprs),
>  	HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
> +	HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
>  	HANDLE_FUNC(__pkvm_reserve_vm),
>  	HANDLE_FUNC(__pkvm_unreserve_vm),
>  	HANDLE_FUNC(__pkvm_init_vm),
> 

This looks incredibly complicated. Since pKVM forbids late onlining of
CPUs, you are absolutely sure that you have already seen the CPU being
hot-plugged on.

So it would make a lot more sense to just return the current value of
the property you are trying to re-evaluate: you know for sure it
cannot change under your feet.

I have quickly tested the following hack:

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c31f8e17732a3..947ff71b3b66b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2345,6 +2345,9 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
 	    !is_midr_in_range_list(has_vgic_v3))
 		return false;
 
+	if (system_capabilities_finalized() && is_protected_kvm_enabled())
+		return cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR);
+
 	if (is_kernel_in_hyp_mode())
 		res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
 	else

which works for me. Could you please give it a go?

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list