[RFC PATCH] arm64: KVM: honor cacheability attributes on S2 page fault
Anup Patel
anup at brainfault.org
Wed Sep 11 08:21:02 EDT 2013
On Tue, Sep 10, 2013 at 3:21 PM, Marc Zyngier <marc.zyngier at arm.com> wrote:
> Anup Patel recently brought up the fact that when we run a guest
> with cache disabled, we don't flush the cache to the Point of
> Coherency, hence possibly missing bits of data that have been
> written in the cache, but have not reached memory.
>
> There are several approaches to this issue:
> - Using the DC bit when caches are off: this breaks guests assuming
> caches off while doing DMA operations. Bootloaders, for example.
> - Fetch the memory attributes on translation fault, and flush the
> cache while handling the fault. This relies on using the PAR_EL1
> register to obtain the Stage-1 memory attributes.
>
> While this second solution is clearly not ideal (it duplicates what
> the HW already does to generate HPFAR_EL2), it is more correct than
> not doing anything.
>
> Cc: Anup Patel <anup at brainfault.org>
> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> ---
>
> arch/arm/include/asm/kvm_mmu.h | 4 ++--
> arch/arm/kvm/mmu.c | 2 +-
> arch/arm64/include/asm/kvm_host.h | 1 +
> arch/arm64/include/asm/kvm_mmu.h | 9 +++++++--
> arch/arm64/kernel/asm-offsets.c | 1 +
> arch/arm64/kvm/hyp.S | 28 ++++++++++++----------------
> 6 files changed, 24 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index 472ac70..faffdf0 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -105,7 +105,7 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
>
> struct kvm;
>
> -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
> +static inline void coherent_icache_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn)
> {
> /*
> * If we are going to insert an instruction page and the icache is
> @@ -120,7 +120,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
> * need any kind of flushing (DDI 0406C.b - Page B3-1392).
> */
> if (icache_is_pipt()) {
> - unsigned long hva = gfn_to_hva(kvm, gfn);
> + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
> __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
> } else if (!icache_is_vivt_asid_tagged()) {
> /* any kind of VIPT cache */
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 0988d9e..ffb3cc4 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -547,7 +547,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> return -EFAULT;
>
> new_pte = pfn_pte(pfn, PAGE_S2);
> - coherent_icache_guest_page(vcpu->kvm, gfn);
> + coherent_icache_guest_page(vcpu, gfn);
>
> spin_lock(&vcpu->kvm->mmu_lock);
> if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 331a852..2530f92 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -79,6 +79,7 @@ struct kvm_mmu_memory_cache {
> struct kvm_vcpu_fault_info {
> u32 esr_el2; /* Hyp Syndrom Register */
> u64 far_el2; /* Hyp Fault Address Register */
> + u64 par_el2; /* Result of FAR_EL2 translation */
Please rename this to par_el1 because we are saving result of Stage1
translation only.
> u64 hpfar_el2; /* Hyp IPA Fault Address Register */
> };
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index efe609c..84baddd 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -118,11 +118,16 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
>
> struct kvm;
>
> -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
> +static inline void coherent_icache_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn)
> {
> if (!icache_is_aliasing()) { /* PIPT */
> - unsigned long hva = gfn_to_hva(kvm, gfn);
> + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
> + u8 attr;
> flush_icache_range(hva, hva + PAGE_SIZE);
> + attr = vcpu->arch.fault.par_el2 >> 56;
> + /* Check for non-device, non-cacheable access */
> + if ((attr & 0xf0) && (attr & 0x0f) == 4)
> + __flush_dcache_area((void *)hva, PAGE_SIZE);
The condition is not complete because when MMU is disabled the memory
attribute is assumed to be 0x0 (i.e. Device-nGnRnE memory)
The condition check that works on X-Gene and Foundation v8 Model is:
+ /* Check for device access OR
+ * non-device, non-cacheable access
+ */
+ if (!(attr & 0xf0) ||
+ ((attr & 0xf0) && (attr & 0x0f) == 4))
Also, we should avoid integer constants here for better code readability.
> } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
> /* any kind of VIPT cache */
> __flush_icache_all();
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 1f3a39d..e67dcac 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -117,6 +117,7 @@ int main(void)
> DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs));
> DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
> DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
> + DEFINE(VCPU_PAR_EL2, offsetof(struct kvm_vcpu, arch.fault.par_el2));
> DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
> DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
> DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 20a58fe..417636b 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -671,24 +671,16 @@ el1_trap:
> ccmp x2, x0, #4, ne
> b.ne 1f // Not an abort we care about
>
> - /* This is an abort. Check for permission fault */
> - and x2, x1, #ESR_EL2_FSC_TYPE
> - cmp x2, #FSC_PERM
> - b.ne 1f // Not a permission fault
> -
> - /*
> - * Check for Stage-1 page table walk, which is guaranteed
> - * to give a valid HPFAR_EL2.
> - */
> - tbnz x1, #7, 1f // S1PTW is set
> -
> /* Preserve PAR_EL1 */
> mrs x3, par_el1
> push x3, xzr
>
> /*
> - * Permission fault, HPFAR_EL2 is invalid.
> - * Resolve the IPA the hard way using the guest VA.
> + * Two cases:
> + * - S2 translation fault, we need the memory attributes.
> + * - S2 permission fault, HPFAR_EL2 is invalid.
> + *
> + * In both cases, resolve the IPA the hard way using the guest VA.
> * Stage-1 translation already validated the memory access rights.
> * As such, we can use the EL1 translation regime, and don't have
> * to distinguish between EL0 and EL1 access.
> @@ -702,15 +694,19 @@ el1_trap:
> pop x0, xzr // Restore PAR_EL1 from the stack
> msr par_el1, x0
> tbnz x3, #0, 3f // Bail out if we failed the translation
> +
> + mrs x0, tpidr_el2
> + str x3, [x0, #VCPU_PAR_EL2]
> ubfx x3, x3, #12, #36 // Extract IPA
> lsl x3, x3, #4 // and present it like HPFAR
> +
> b 2f
>
> -1: mrs x3, hpfar_el2
> +1: mrs x0, tpidr_el2
> mrs x2, far_el2
> + mrs x3, hpfar_el2
>
> -2: mrs x0, tpidr_el2
> - str x1, [x0, #VCPU_ESR_EL2]
> +2: str x1, [x0, #VCPU_ESR_EL2]
> str x2, [x0, #VCPU_FAR_EL2]
> str x3, [x0, #VCPU_HPFAR_EL2]
>
> --
> 1.8.2.3
>
>
In general, the patch had formatting issues so had to apply the changes
manually.
This patch works on X-Gene and Foundation v8 Model with above mentioned
modified condition check.
Tested-by: Anup Patel <anup at brainfault.org>
Thanks,
Anup
More information about the linux-arm-kernel
mailing list