From db24e10aa41a183b0b32e662246d6f693fa56a4f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 Aug 2013 05:23:14 +0100 Subject: [PATCH] arm64: KVM: let the guest access cacheable memory when Stage-1 MMU is off When a guest boots, its Stage-1 MMU is off. This has the side effect of bypassing the caches, and hitting directly into RAM. If the system has a big enough cache, it is possible for the initial payload loaded by userspace to only sit in the cache, which the guest will fail to correctly fetch. One possibility would be to flush the dcache to Point of Coherency. Another idea is to use the HCR_DC (Default Cacheable) bit as long as the Stage-1 MMU is off. To detect this transition, it becomes necessary to trap access to the SCTLR_EL1 system register (using HCR_TVM, which traps write access to all VM-related registers). Once the Stage-1 MMU is enabled, we can safely disable the HCR_DC bit, as well as HCR_TVM. This ensures that the impact of this change is minimal, and limited to the guest early boot. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 4 ++- arch/arm64/kvm/sys_regs.c | 58 ++++++++++++++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a5f28e2..a71a9bf 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -60,10 +60,12 @@ /* * The bits we set in HCR: * RW: 64bit by default, can be overriden for 32bit VMs + * TVM: Trap writes to VM registers (until MMU is on) * TAC: Trap ACTLR * TSC: Trap SMC * TSW: Trap cache operations by set/way * TWI: Trap WFI + * DC: Default Cacheable (until MMU is on) * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain * FB: Force broadcast of all maintainance operations @@ -74,7 +76,7 @@ */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP | HCR_RW) + HCR_DC | HCR_TVM | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* Hyp System Control Register (SCTLR_EL2) bits */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9492360..6d81d87 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -121,6 +121,42 @@ done: } /* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + BUG_ON(!p->is_write); + + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + return true; +} + +/* + * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + */ +static bool access_sctlr_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + + BUG_ON(!p->is_write); + + val = *vcpu_reg(vcpu, p->Rt); + vcpu_sys_reg(vcpu, r->reg) = val; + + if (val & (1 << 0)) /* MMU Enabled? */ + vcpu->arch.hcr_el2 &= ~(HCR_DC | HCR_TVM); + + return true; +} + +/* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was * NAKed, so it will read the PMCR anyway. @@ -185,32 +221,32 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_mpidr, MPIDR_EL1 }, /* SCTLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - NULL, reset_val, SCTLR_EL1, 0x00C50078 }, + access_sctlr_el1, reset_val, SCTLR_EL1, 0x00C50078 }, /* CPACR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), NULL, reset_val, CPACR_EL1, 0 }, /* TTBR0_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, TTBR0_EL1 }, + access_vm_reg, reset_unknown, TTBR0_EL1 }, /* TTBR1_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), - NULL, reset_unknown, TTBR1_EL1 }, + access_vm_reg, reset_unknown, TTBR1_EL1 }, /* TCR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), - NULL, reset_val, TCR_EL1, 0 }, + access_vm_reg, reset_val, TCR_EL1, 0 }, /* AFSR0_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), - NULL, reset_unknown, AFSR0_EL1 }, + access_vm_reg, reset_unknown, AFSR0_EL1 }, /* AFSR1_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), - NULL, reset_unknown, AFSR1_EL1 }, + access_vm_reg, reset_unknown, AFSR1_EL1 }, /* ESR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), - NULL, reset_unknown, ESR_EL1 }, + access_vm_reg, reset_unknown, ESR_EL1 }, /* FAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, FAR_EL1 }, + access_vm_reg, reset_unknown, FAR_EL1 }, /* PMINTENSET_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), @@ -221,17 +257,17 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* MAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), - NULL, reset_unknown, MAIR_EL1 }, + access_vm_reg, reset_unknown, MAIR_EL1 }, /* AMAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), - NULL, reset_amair_el1, AMAIR_EL1 }, + access_vm_reg, reset_amair_el1, AMAIR_EL1 }, /* VBAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), NULL, reset_val, VBAR_EL1, 0 }, /* CONTEXTIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), - NULL, reset_val, CONTEXTIDR_EL1, 0 }, + access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, /* TPIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), NULL, reset_unknown, TPIDR_EL1 }, -- 1.8.1.5