[PATCH 03/14] KVM: arm64: Support host MMIO trap handlers for unmapped devices

Fri Mar 13 02:31:26 PDT 2026

Hi Sebastian,

On Tue, 10 Mar 2026 at 12:49, Sebastian Ene <sebastianene at google.com> wrote:
>
> Introduce a mechanism to register callbacks for MMIO accesses to regions
> unmapped from the host Stage-2 page tables.
>
> This infrastructure allows the hypervisor to intercept host accesses to
> protected or emulated devices. When a Stage-2 fault occurs on a
> registered device region, the hypervisor will invoke the associated
> callback to emulate the access.
>
> Signed-off-by: Sebastian Ene <sebastianene at google.com>
> ---
>  arch/arm64/include/asm/kvm_arm.h      |  3 ++
>  arch/arm64/include/asm/kvm_pkvm.h     |  6 ++++
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c | 41 +++++++++++++++++++++++++++
>  arch/arm64/kvm/hyp/nvhe/setup.c       |  3 ++
>  4 files changed, 53 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 3f9233b5a130..8fe1e80ab3f4 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -304,6 +304,9 @@
>
>  /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
>  #define HPFAR_MASK     (~UL(0xf))
> +
> +#define FAR_MASK       GENMASK_ULL(11, 0)
> +
>  /*
>   * We have
>   *     PAR     [PA_Shift - 1   : 12] = PA      [PA_Shift - 1 : 12]
> diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> index 48ec7d519399..5321ced2f50a 100644
> --- a/arch/arm64/include/asm/kvm_pkvm.h
> +++ b/arch/arm64/include/asm/kvm_pkvm.h
> @@ -19,9 +19,15 @@
>
>  #define PKVM_PROTECTED_REGS_NUM        8
>
> +struct pkvm_protected_reg;
> +
> +typedef void (pkvm_emulate_handler)(struct pkvm_protected_reg *region, u64 offset, bool write,
> +                                   u64 *reg, u8 reg_size);
> +
>  struct pkvm_protected_reg {
>         u64 start_pfn;
>         size_t num_pages;
> +       pkvm_emulate_handler *cb;
>  };
>
>  extern struct pkvm_protected_reg kvm_nvhe_sym(pkvm_protected_regs)[];
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 7c125836b533..f405d2fbd88f 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -13,6 +13,7 @@
>  #include <asm/stage2_pgtable.h>
>
>  #include <hyp/fault.h>
> +#include <hyp/adjust_pc.h>

Please sort includes alphabetically.

>
>  #include <nvhe/gfp.h>
>  #include <nvhe/memory.h>
> @@ -608,6 +609,41 @@ static int host_stage2_idmap(u64 addr)
>         return ret;
>  }
>
> +static bool handle_host_mmio_trap(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
> +{
> +       u64 offset, reg_value = 0, start, end;
> +       u8 reg_size, reg_index;
> +       bool write;
> +       int i;

What do you plan to do if there is no valid syndrome, i.e.,
ESR_EL2.ISV == 0? I am still reviewing, so maybe this is solved in a
future patch, or maybe you know that, in practice, all instructions
would have a valid syndrome. Regardless of which it is, you should
definitely add the following check to _this_ patch (or reconsider the
approach if it is possible to get legit accesses with ESR_EL2.ISV ==
0):

+      if (!(esr & ESR_ELx_ISV))
+              return false;

> +
> +       for (i = 0; i < num_protected_reg; i++) {
> +               start = pkvm_protected_regs[i].start_pfn << PAGE_SHIFT;
> +               end = start + (pkvm_protected_regs[i].num_pages << PAGE_SHIFT);
> +
> +               if (start > addr || addr > end)

Because end is calculated by adding the size, it represents the first
byte after the region, so this should be:
+               if (start > addr || addr >= end)
> +                       continue;

You also need to make sure that the entire access fits within the
protected region, to avoid a malicious or misaligned cross-boundary
access, i.e.:

+                if (addr + reg_size > end)
+                        return false;

> +               reg_size = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
> +               reg_index = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
> +               write = (esr & ESR_ELx_WNR) == ESR_ELx_WNR;
> +               offset = addr - start;
> +
> +               if (write)
> +                       reg_value = host_ctxt->regs.regs[reg_index];

You need to handle the zero register (index 31) for writes, e.g.:
+                       reg_value = (reg_index == 31) ? 0 :
host_ctxt->regs.regs[reg_index];

> +
> +               pkvm_protected_regs[i].cb(&pkvm_protected_regs[i], offset, write,
> +                                         &reg_value, reg_size);
> +
> +               if (!write)
> +                       host_ctxt->regs.regs[reg_index] = reg_value;

and for reads:
+               if (!write & reg_index != 31)

Cheers,
/fuad

> +
> +               kvm_skip_host_instr();
> +               return true;
> +       }
> +
> +       return false;
> +}
> +
>  void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>  {
>         struct kvm_vcpu_fault_info fault;
> @@ -630,6 +666,11 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>          */
>         BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
>         addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
> +       addr |= fault.far_el2 & FAR_MASK;
> +
> +       if (ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW && !addr_is_memory(addr) &&
> +           handle_host_mmio_trap(host_ctxt, esr, addr))
> +               return;
>
>         ret = host_stage2_idmap(addr);
>         BUG_ON(ret && ret != -EAGAIN);
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index ad5b96085e1b..f91dfebe9980 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -296,6 +296,9 @@ static int unmap_protected_regions(void)
>                         if (ret)
>                                 goto err_setup;
>                 }
> +
> +               if (reg->cb)
> +                       reg->cb = kern_hyp_va(reg->cb);
>         }
>
>         return 0;
> --
> 2.53.0.473.g4a7958ca14-goog
>