[RFC PATCH] arm64: KVM: honor cacheability attributes on S2 page fault
Marc Zyngier
marc.zyngier at arm.com
Wed Sep 11 08:35:56 EDT 2013
On 11/09/13 13:21, Anup Patel wrote:
> On Tue, Sep 10, 2013 at 3:21 PM, Marc Zyngier <marc.zyngier at arm.com> wrote:
>> Anup Patel recently brought up the fact that when we run a guest
>> with cache disabled, we don't flush the cache to the Point of
>> Coherency, hence possibly missing bits of data that have been
>> written in the cache, but have not reached memory.
>>
>> There are several approaches to this issue:
>> - Using the DC bit when caches are off: this breaks guests assuming
>> caches off while doing DMA operations. Bootloaders, for example.
>> - Fetch the memory attributes on translation fault, and flush the
>> cache while handling the fault. This relies on using the PAR_EL1
>> register to obtain the Stage-1 memory attributes.
>>
>> While this second solution is clearly not ideal (it duplicates what
>> the HW already does to generate HPFAR_EL2), it is more correct than
>> not doing anything.
>>
>> Cc: Anup Patel <anup at brainfault.org>
>> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
>> ---
>>
>> arch/arm/include/asm/kvm_mmu.h | 4 ++--
>> arch/arm/kvm/mmu.c | 2 +-
>> arch/arm64/include/asm/kvm_host.h | 1 +
>> arch/arm64/include/asm/kvm_mmu.h | 9 +++++++--
>> arch/arm64/kernel/asm-offsets.c | 1 +
>> arch/arm64/kvm/hyp.S | 28 ++++++++++++----------------
>> 6 files changed, 24 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
>> index 472ac70..faffdf0 100644
>> --- a/arch/arm/include/asm/kvm_mmu.h
>> +++ b/arch/arm/include/asm/kvm_mmu.h
>> @@ -105,7 +105,7 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
>>
>> struct kvm;
>>
>> -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
>> +static inline void coherent_icache_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn)
>> {
>> /*
>> * If we are going to insert an instruction page and the icache is
>> @@ -120,7 +120,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
>> * need any kind of flushing (DDI 0406C.b - Page B3-1392).
>> */
>> if (icache_is_pipt()) {
>> - unsigned long hva = gfn_to_hva(kvm, gfn);
>> + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
>> __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
>> } else if (!icache_is_vivt_asid_tagged()) {
>> /* any kind of VIPT cache */
>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>> index 0988d9e..ffb3cc4 100644
>> --- a/arch/arm/kvm/mmu.c
>> +++ b/arch/arm/kvm/mmu.c
>> @@ -547,7 +547,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>> return -EFAULT;
>>
>> new_pte = pfn_pte(pfn, PAGE_S2);
>> - coherent_icache_guest_page(vcpu->kvm, gfn);
>> + coherent_icache_guest_page(vcpu, gfn);
>>
>> spin_lock(&vcpu->kvm->mmu_lock);
>> if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index 331a852..2530f92 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -79,6 +79,7 @@ struct kvm_mmu_memory_cache {
>> struct kvm_vcpu_fault_info {
>> u32 esr_el2; /* Hyp Syndrom Register */
>> u64 far_el2; /* Hyp Fault Address Register */
>> + u64 par_el2; /* Result of FAR_EL2 translation */
>
> Please rename this to par_el1 because we are saving result of Stage1
> translation only.
That'd be very confusing, as we deal with PAR_EL1 separately, as part of
the guest context.
If anything, I may rename it to "what_I_d_love_HPFAR_to_return" instead.
>> u64 hpfar_el2; /* Hyp IPA Fault Address Register */
>> };
>>
>> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
>> index efe609c..84baddd 100644
>> --- a/arch/arm64/include/asm/kvm_mmu.h
>> +++ b/arch/arm64/include/asm/kvm_mmu.h
>> @@ -118,11 +118,16 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
>>
>> struct kvm;
>>
>> -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
>> +static inline void coherent_icache_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn)
>> {
>> if (!icache_is_aliasing()) { /* PIPT */
>> - unsigned long hva = gfn_to_hva(kvm, gfn);
>> + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
>> + u8 attr;
>> flush_icache_range(hva, hva + PAGE_SIZE);
>> + attr = vcpu->arch.fault.par_el2 >> 56;
>> + /* Check for non-device, non-cacheable access */
>> + if ((attr & 0xf0) && (attr & 0x0f) == 4)
>> + __flush_dcache_area((void *)hva, PAGE_SIZE);
>
> The condition is not complete because when MMU is disabled the memory
> attribute is assumed to be 0x0 (i.e. Device-nGnRnE memory)
That's a good point.
> The condition check that works on X-Gene and Foundation v8 Model is:
>
> + /* Check for device access OR
> + * non-device, non-cacheable access
> + */
> + if (!(attr & 0xf0) ||
> + ((attr & 0xf0) && (attr & 0x0f) == 4))
>
> Also, we should avoid integer constants here for better code readability.
>
>> } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
>> /* any kind of VIPT cache */
>> __flush_icache_all();
>> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
>> index 1f3a39d..e67dcac 100644
>> --- a/arch/arm64/kernel/asm-offsets.c
>> +++ b/arch/arm64/kernel/asm-offsets.c
>> @@ -117,6 +117,7 @@ int main(void)
>> DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs));
>> DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
>> DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
>> + DEFINE(VCPU_PAR_EL2, offsetof(struct kvm_vcpu, arch.fault.par_el2));
>> DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
>> DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
>> DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
>> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
>> index 20a58fe..417636b 100644
>> --- a/arch/arm64/kvm/hyp.S
>> +++ b/arch/arm64/kvm/hyp.S
>> @@ -671,24 +671,16 @@ el1_trap:
>> ccmp x2, x0, #4, ne
>> b.ne 1f // Not an abort we care about
>>
>> - /* This is an abort. Check for permission fault */
>> - and x2, x1, #ESR_EL2_FSC_TYPE
>> - cmp x2, #FSC_PERM
>> - b.ne 1f // Not a permission fault
>> -
>> - /*
>> - * Check for Stage-1 page table walk, which is guaranteed
>> - * to give a valid HPFAR_EL2.
>> - */
>> - tbnz x1, #7, 1f // S1PTW is set
>> -
>> /* Preserve PAR_EL1 */
>> mrs x3, par_el1
>> push x3, xzr
>>
>> /*
>> - * Permission fault, HPFAR_EL2 is invalid.
>> - * Resolve the IPA the hard way using the guest VA.
>> + * Two cases:
>> + * - S2 translation fault, we need the memory attributes.
>> + * - S2 permission fault, HPFAR_EL2 is invalid.
>> + *
>> + * In both cases, resolve the IPA the hard way using the guest VA.
>> * Stage-1 translation already validated the memory access rights.
>> * As such, we can use the EL1 translation regime, and don't have
>> * to distinguish between EL0 and EL1 access.
>> @@ -702,15 +694,19 @@ el1_trap:
>> pop x0, xzr // Restore PAR_EL1 from the stack
>> msr par_el1, x0
>> tbnz x3, #0, 3f // Bail out if we failed the translation
>> +
>> + mrs x0, tpidr_el2
>> + str x3, [x0, #VCPU_PAR_EL2]
>> ubfx x3, x3, #12, #36 // Extract IPA
>> lsl x3, x3, #4 // and present it like HPFAR
>> +
>> b 2f
>>
>> -1: mrs x3, hpfar_el2
>> +1: mrs x0, tpidr_el2
>> mrs x2, far_el2
>> + mrs x3, hpfar_el2
>>
>> -2: mrs x0, tpidr_el2
>> - str x1, [x0, #VCPU_ESR_EL2]
>> +2: str x1, [x0, #VCPU_ESR_EL2]
>> str x2, [x0, #VCPU_FAR_EL2]
>> str x3, [x0, #VCPU_HPFAR_EL2]
>>
>> --
>> 1.8.2.3
>>
>>
>
> In general, the patch had formatting issues so had to apply the changes
> manually.
maz at e102391-lin:~/Work/arm-platforms$ git checkout v3.11
HEAD is now at 6e46645... Linux 3.11
maz at e102391-lin:~/Work/arm-platforms$ git am
tmp/0001-arm64-KVM-honor-cacheability-attributes-on-S2-page-f.patch
Applying: arm64: KVM: honor cacheability attributes on S2 page fault
maz at e102391-lin:~/Work/arm-platforms$
Works fine here.
> This patch works on X-Gene and Foundation v8 Model with above mentioned
> modified condition check.
>
> Tested-by: Anup Patel <anup at brainfault.org>
Thanks.
M.
--
Jazz is not dead. It just smells funny...
More information about the linux-arm-kernel
mailing list