[PATCH v3 18/36] KVM: arm64: Inject SIGSEGV on illegal accesses
Fuad Tabba
tabba at google.com
Wed Mar 11 03:13:46 PDT 2026
On Thu, 5 Mar 2026 at 14:45, Will Deacon <will at kernel.org> wrote:
>
> From: Quentin Perret <qperret at google.com>
>
> The pKVM hypervisor will currently panic if the host tries to access
> memory that it doesn't own (e.g. protected guest memory). Sadly, as
> guest memory can still be mapped into the VMM's address space, userspace
> can trivially crash the kernel/hypervisor by poking into guest memory.
>
> To prevent this, inject the abort back in the host with S1PTW set in the
> ESR, hence allowing the host to differentiate this abort from normal
> userspace faults and inject a SIGSEGV cleanly.
>
> Signed-off-by: Quentin Perret <qperret at google.com>
> Signed-off-by: Will Deacon <will at kernel.org>
Reviewed-by: Fuad Tabba <tabba at google.com>
Cheers,
/fuad
> ---
> arch/arm64/kvm/hyp/nvhe/mem_protect.c | 37 +++++++++++++++++++++++++++
> arch/arm64/mm/fault.c | 22 ++++++++++++++++
> 2 files changed, 59 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 31b6a52e5e4c..0dc1d6fc546c 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -18,6 +18,7 @@
> #include <nvhe/memory.h>
> #include <nvhe/mem_protect.h>
> #include <nvhe/mm.h>
> +#include <nvhe/trap_handler.h>
>
> #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP)
>
> @@ -612,6 +613,39 @@ static int host_stage2_idmap(u64 addr)
> return ret;
> }
>
> +static void host_inject_mem_abort(struct kvm_cpu_context *host_ctxt)
> +{
> + u64 ec, esr, spsr;
> +
> + esr = read_sysreg_el2(SYS_ESR);
> + spsr = read_sysreg_el2(SYS_SPSR);
> +
> + /* Repaint the ESR to report a same-level fault if taken from EL1 */
> + if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
> + ec = ESR_ELx_EC(esr);
> + if (ec == ESR_ELx_EC_DABT_LOW)
> + ec = ESR_ELx_EC_DABT_CUR;
> + else if (ec == ESR_ELx_EC_IABT_LOW)
> + ec = ESR_ELx_EC_IABT_CUR;
> + else
> + WARN_ON(1);
> + esr &= ~ESR_ELx_EC_MASK;
> + esr |= ec << ESR_ELx_EC_SHIFT;
> + }
> +
> + /*
> + * Since S1PTW should only ever be set for stage-2 faults, we're pretty
> + * much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
> + * let's use that bit to allow the host abort handler to differentiate
> + * this abort from normal userspace faults.
> + *
> + * Note: although S1PTW is RES0 at EL1, it is guaranteed by the
> + * architecture to be backed by flops, so it should be safe to use.
> + */
> + esr |= ESR_ELx_S1PTW;
> + inject_host_exception(esr);
> +}
> +
> void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
> {
> struct kvm_vcpu_fault_info fault;
> @@ -635,6 +669,9 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
> addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
>
> switch (host_stage2_idmap(addr)) {
> + case -EPERM:
> + host_inject_mem_abort(host_ctxt);
> + fallthrough;
> case -EEXIST:
> case 0:
> break;
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index be9dab2c7d6a..3abfc7272d63 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -43,6 +43,7 @@
> #include <asm/system_misc.h>
> #include <asm/tlbflush.h>
> #include <asm/traps.h>
> +#include <asm/virt.h>
>
> struct fault_info {
> int (*fn)(unsigned long far, unsigned long esr,
> @@ -269,6 +270,15 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr
> return false;
> }
>
> +static bool is_pkvm_stage2_abort(unsigned int esr)
> +{
> + /*
> + * S1PTW should only ever be set in ESR_EL1 if the pkvm hypervisor
> + * injected a stage-2 abort -- see host_inject_mem_abort().
> + */
> + return is_pkvm_initialized() && (esr & ESR_ELx_S1PTW);
> +}
> +
> static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
> unsigned long esr,
> struct pt_regs *regs)
> @@ -279,6 +289,9 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
> if (!is_el1_data_abort(esr) || !esr_fsc_is_translation_fault(esr))
> return false;
>
> + if (is_pkvm_stage2_abort(esr))
> + return false;
> +
> local_irq_save(flags);
> asm volatile("at s1e1r, %0" :: "r" (addr));
> isb();
> @@ -395,6 +408,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
> msg = "read from unreadable memory";
> } else if (addr < PAGE_SIZE) {
> msg = "NULL pointer dereference";
> + } else if (is_pkvm_stage2_abort(esr)) {
> + msg = "access to hypervisor-protected memory";
> } else {
> if (esr_fsc_is_translation_fault(esr) &&
> kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
> @@ -621,6 +636,13 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
> addr, esr, regs);
> }
>
> + if (is_pkvm_stage2_abort(esr)) {
> + if (!user_mode(regs))
> + goto no_context;
> + arm64_force_sig_fault(SIGSEGV, SEGV_ACCERR, far, "stage-2 fault");
> + return 0;
> + }
> +
> perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
>
> if (!(mm_flags & FAULT_FLAG_USER))
> --
> 2.53.0.473.g4a7958ca14-goog
>
More information about the linux-arm-kernel
mailing list