[PATCH 13/30] KVM: arm64: Introduce __pkvm_reclaim_dying_guest_page()
Vincent Donnefort
vdonnefort at google.com
Tue Jan 6 08:26:44 PST 2026
On Mon, Jan 05, 2026 at 03:49:21PM +0000, Will Deacon wrote:
> To enable reclaim of pages from a protected VM during teardown,
> introduce a new hypercall to reclaim a single page from a protected
> guest that is in the dying state.
>
> Since the EL2 code is non-preemptible, the new hypercall deliberately
> acts on a single page at a time so as to allow EL1 to reschedule
> frequently during the teardown operation.
>
> Co-developed-by: Quentin Perret <qperret at google.com>
> Signed-off-by: Quentin Perret <qperret at google.com>
> Signed-off-by: Will Deacon <will at kernel.org>
Reviewed-by: Vincent Donnefort <vdonnefort at google.com>
> ---
> arch/arm64/include/asm/kvm_asm.h | 1 +
> arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
> arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 +
> arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 +++
> arch/arm64/kvm/hyp/nvhe/mem_protect.c | 79 +++++++++++++++++++
> arch/arm64/kvm/hyp/nvhe/pkvm.c | 14 ++++
> 6 files changed, 105 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index cad3ba5e1c5a..f14f845aeedd 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -86,6 +86,7 @@ enum __kvm_host_smccc_func {
> __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
> __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
> __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
> + __KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
> __KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
> __KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
> __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> index 9c0cc53d1dc9..cde38a556049 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> @@ -41,6 +41,7 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
> int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
> int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
> int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
> +int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm);
> int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
> enum kvm_pgtable_prot prot);
> int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm);
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
> index 04c7ca703014..506831804f64 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
> @@ -74,6 +74,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
> int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
> unsigned long vcpu_hva);
>
> +int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
> int __pkvm_start_teardown_vm(pkvm_handle_t handle);
> int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index a5ee1103ce1f..b1940e639ad3 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -570,6 +570,14 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
> cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
> }
>
> +static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
> +{
> + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
> + DECLARE_REG(u64, gfn, host_ctxt, 2);
> +
> + cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, gfn);
> +}
> +
> static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
> {
> DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
> @@ -622,6 +630,7 @@ static const hcall_t host_hcall[] = {
> HANDLE_FUNC(__pkvm_unreserve_vm),
> HANDLE_FUNC(__pkvm_init_vm),
> HANDLE_FUNC(__pkvm_init_vcpu),
> + HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
> HANDLE_FUNC(__pkvm_start_teardown_vm),
> HANDLE_FUNC(__pkvm_finalize_teardown_vm),
> HANDLE_FUNC(__pkvm_vcpu_load),
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index ae126ab9febf..edbfe0e3dc58 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -725,6 +725,32 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
> return check_page_state_range(&vm->pgt, addr, size, &d);
> }
>
> +static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
> +{
> + kvm_pte_t pte;
> + u64 phys;
> + s8 level;
> + int ret;
> +
> + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
> + if (ret)
> + return ret;
> + if (!kvm_pte_valid(pte))
> + return -ENOENT;
> + if (level != KVM_PGTABLE_LAST_LEVEL)
> + return -E2BIG;
> +
> + phys = kvm_pte_to_phys(pte);
> + ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
> + if (WARN_ON(ret))
> + return ret;
> +
> + *ptep = pte;
> + *physp = phys;
> +
> + return 0;
> +}
> +
> int __pkvm_host_share_hyp(u64 pfn)
> {
> u64 phys = hyp_pfn_to_phys(pfn);
> @@ -958,6 +984,59 @@ static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *s
> return 0;
> }
>
> +static void hyp_poison_page(phys_addr_t phys)
> +{
> + void *addr = hyp_fixmap_map(phys);
> +
> + memset(addr, 0, PAGE_SIZE);
> + /*
> + * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
> + * here as the latter may elide the CMO under the assumption that FWB
> + * will be enabled on CPUs that support it. This is incorrect for the
> + * host stage-2 and would otherwise lead to a malicious host potentially
> + * being able to read the contents of newly reclaimed guest pages.
> + */
> + kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
> + hyp_fixmap_unmap();
> +}
> +
> +int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
> +{
> + u64 ipa = hyp_pfn_to_phys(gfn);
> + kvm_pte_t pte;
> + u64 phys;
> + int ret;
> +
> + host_lock_component();
> + guest_lock_component(vm);
> +
> + ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
> + if (ret)
> + goto unlock;
> +
> + switch (guest_get_page_state(pte, ipa)) {
> + case PKVM_PAGE_OWNED:
> + WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
> + hyp_poison_page(phys);
> + break;
> + case PKVM_PAGE_SHARED_OWNED:
> + WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
> + break;
> + default:
> + ret = -EPERM;
> + goto unlock;
> + }
> +
> + WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
> + WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
> +
> +unlock:
> + guest_unlock_component(vm);
> + host_unlock_component();
> +
> + return ret;
> +}
> +
> int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
> {
> struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
> diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> index 7f8191f96fc3..9f0997150cf5 100644
> --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> @@ -832,6 +832,20 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
> unmap_donated_memory_noclear(addr, size);
> }
>
> +int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn)
> +{
> + struct pkvm_hyp_vm *hyp_vm;
> + int ret = -EINVAL;
> +
> + hyp_spin_lock(&vm_table_lock);
> + hyp_vm = get_vm_by_handle(handle);
> + if (hyp_vm && hyp_vm->kvm.arch.pkvm.is_dying)
> + ret = __pkvm_host_reclaim_page_guest(gfn, hyp_vm);
> + hyp_spin_unlock(&vm_table_lock);
> +
> + return ret;
> +}
> +
> int __pkvm_start_teardown_vm(pkvm_handle_t handle)
> {
> struct pkvm_hyp_vm *hyp_vm;
> --
> 2.52.0.351.gbe84eed79e-goog
>
More information about the linux-arm-kernel
mailing list