[PATCH v5 69/69] KVM: arm64: nv: Fast-track EL1 TLBIs for VHE guests
Marc Zyngier
maz at kernel.org
Mon Nov 29 12:01:50 PST 2021
Due to the way ARMv8.4-NV suppresses traps when accessing EL2
system registers, we can't track when the guest changes its
HCR_EL2.TGE setting. This means we always trap EL1 TLBIs,
even if they don't affect any guest.
This obviously has a huge impact on performance, as we handle
TLBI traps as a normal exit, and a normal VHE host issues
thousands of TLBIs when booting (and quite a few when running
userspace).
A cheap way to reduce the overhead is to handle the limited
case of {E2H,TGE}=={1,1} as a guest fixup, as we already have
the right mmu configuration in place. Just execute the decoded
instruction right away and return to the guest.
Signed-off-by: Marc Zyngier <maz at kernel.org>
---
arch/arm64/kvm/hyp/vhe/switch.c | 43 ++++++++++++++++++++++++++++++++-
arch/arm64/kvm/hyp/vhe/tlb.c | 6 +++--
arch/arm64/kvm/sys_regs.c | 25 ++++++-------------
3 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6cbe6a89dbdb..fa2fd7e911b2 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -162,6 +162,47 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
__deactivate_traps_common(vcpu);
}
+static bool kvm_hyp_handle_tlbi_el1(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u32 instr;
+ u64 val;
+
+ /*
+ * Ideally, we would never trap on EL1 TLB invalidations when the
+ * guest's HCR_EL2.{E2H,TGE} == {1,1}. But "thanks" to ARMv8.4, we
+ * don't trap writes to HCR_EL2, meaning that we can't track
+ * changes to the virtual TGE bit. So we leave HCR_EL2.TTLB set on
+ * the host. Oopsie...
+ *
+ * In order to speed-up EL1 TLBIs from the vEL2 guest when TGE is
+ * set, try and handle these invalidation as quickly as possible,
+ * without fully exiting (unless this needs forwarding).
+ */
+ if (!enhanced_nested_virt_in_use(vcpu) ||
+ !vcpu_mode_el2(vcpu) ||
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE))
+ return false;
+
+ instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ if (sys_reg_Op0(instr) != TLBI_Op0 ||
+ sys_reg_Op1(instr) != TLBI_Op1_EL1)
+ return false;
+
+ val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+ __kvm_tlb_el1_instr(NULL, val, instr);
+ __kvm_skip_instr(vcpu);
+
+ return true;
+}
+
+static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (kvm_hyp_handle_tlbi_el1(vcpu, exit_code))
+ return true;
+
+ return kvm_hyp_handle_sysreg(vcpu, exit_code);
+}
+
static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
@@ -210,7 +251,7 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
- [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg_vhe,
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index c4389db4cc22..beb162468c0b 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -201,7 +201,8 @@ void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding)
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ if (mmu)
+ __tlb_switch_to_guest(mmu, &cxt);
/*
* Execute the same instruction as the guest hypervisor did,
@@ -240,5 +241,6 @@ void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding)
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ if (mmu)
+ __tlb_switch_to_host(&cxt);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e8ab052be122..acfd3c72faf6 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2804,6 +2804,8 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+ u64 virtual_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ struct kvm_s2_mmu *mmu;
/*
* If we're here, this is because we've trapped on a EL1 TLBI
@@ -2822,24 +2824,13 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
mutex_lock(&vcpu->kvm->lock);
- if ((__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
- u64 virtual_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
- struct kvm_s2_mmu *mmu;
-
- mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, HCR_VM);
- if (mmu)
- __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
+ mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, HCR_VM);
+ if (mmu)
+ __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
- mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, 0);
- if (mmu)
- __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
- } else {
- /*
- * ARMv8.4-NV allows the guest to change TGE behind
- * our back, so we always trap EL1 TLBIs from vEL2...
- */
- __kvm_tlb_el1_instr(&vcpu->kvm->arch.mmu, p->regval, sys_encoding);
- }
+ mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, 0);
+ if (mmu)
+ __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
mutex_unlock(&vcpu->kvm->lock);
--
2.30.2
More information about the linux-arm-kernel
mailing list