[PATCH 1/4] ARM: KVM: be more thorough when invalidating TLBs
Christoffer Dall
cdall at cs.columbia.edu
Tue Apr 30 13:17:17 EDT 2013
On Tue, Apr 30, 2013 at 7:17 AM, Marc Zyngier <marc.zyngier at arm.com> wrote:
> The KVM/ARM MMU code doesn't take care of invalidating TLBs before
> freeing a {pte,pmd} table. This could cause problems if the page
> is reallocated and then speculated into by another CPU.
>
> Reported-by: Catalin Marinas <catalin.marinas at arm.com>
> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> ---
> arch/arm/kvm/interrupts.S | 2 ++
> arch/arm/kvm/mmu.c | 36 +++++++++++++++++++++---------------
> 2 files changed, 23 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> index f7793df..9e2d906c 100644
> --- a/arch/arm/kvm/interrupts.S
> +++ b/arch/arm/kvm/interrupts.S
> @@ -37,6 +37,8 @@ __kvm_hyp_code_start:
> *
> * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
> *
> + * Invalidate TLB for the given IPA, or invalidate all if ipa is zero.
> + *
> * We rely on the hardware to broadcast the TLB invalidation to all CPUs
> * inside the inner-shareable domain (which is the case for all v7
> * implementations). If we come across a non-IS SMP implementation, we'll
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 9657065..71f2a57 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -43,7 +43,8 @@ static phys_addr_t hyp_idmap_vector;
>
> static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
> {
> - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
> + if (kvm)
> + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
this feels slightly abusive? could we add a comment that hyp page
table freeing don't need tlb flushing from these functions and don't
have a struct kvm?
alternatively it may be more clear to have something like:
if (kvm) /* Check if Hyp or Stage-2 page table */
kvm_tlb_flush_vmid_ipa(kvm, addr);
in the callers, but, eh, maybe I'm over thinking this.
> }
>
> static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
> @@ -78,18 +79,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
> return p;
> }
>
> -static void clear_pud_entry(pud_t *pud)
> +static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
> {
> pmd_t *pmd_table = pmd_offset(pud, 0);
> pud_clear(pud);
> + kvm_tlb_flush_vmid_ipa(kvm, addr);
> pmd_free(NULL, pmd_table);
> put_page(virt_to_page(pud));
> }
>
> -static void clear_pmd_entry(pmd_t *pmd)
> +static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
> {
> pte_t *pte_table = pte_offset_kernel(pmd, 0);
> pmd_clear(pmd);
> + kvm_tlb_flush_vmid_ipa(kvm, addr);
> pte_free_kernel(NULL, pte_table);
> put_page(virt_to_page(pmd));
> }
> @@ -100,11 +103,12 @@ static bool pmd_empty(pmd_t *pmd)
> return page_count(pmd_page) == 1;
> }
>
> -static void clear_pte_entry(pte_t *pte)
> +static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
> {
> if (pte_present(*pte)) {
> kvm_set_pte(pte, __pte(0));
> put_page(virt_to_page(pte));
> + kvm_tlb_flush_vmid_ipa(kvm, addr);
I enjoyed the fact that it's perfectly correct to have this flush
after the put_page.
> }
> }
>
> @@ -114,7 +118,8 @@ static bool pte_empty(pte_t *pte)
> return page_count(pte_page) == 1;
> }
>
> -static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
> +static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
> + unsigned long long start, u64 size)
> {
> pgd_t *pgd;
> pud_t *pud;
> @@ -138,15 +143,15 @@ static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
> }
>
> pte = pte_offset_kernel(pmd, addr);
> - clear_pte_entry(pte);
> + clear_pte_entry(kvm, pte, addr);
> range = PAGE_SIZE;
>
> /* If we emptied the pte, walk back up the ladder */
> if (pte_empty(pte)) {
> - clear_pmd_entry(pmd);
> + clear_pmd_entry(kvm, pmd, addr);
> range = PMD_SIZE;
> if (pmd_empty(pmd)) {
> - clear_pud_entry(pud);
> + clear_pud_entry(kvm, pud, addr);
> range = PUD_SIZE;
> }
> }
> @@ -165,14 +170,14 @@ void free_boot_hyp_pgd(void)
> mutex_lock(&kvm_hyp_pgd_mutex);
>
> if (boot_hyp_pgd) {
> - unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
> - unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
> + unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
> + unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
> kfree(boot_hyp_pgd);
> boot_hyp_pgd = NULL;
> }
>
> if (hyp_pgd)
> - unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
> + unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
>
> kfree(init_bounce_page);
> init_bounce_page = NULL;
> @@ -200,9 +205,10 @@ void free_hyp_pgds(void)
>
> if (hyp_pgd) {
> for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
> - unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
> + unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
> for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
> - unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
> + unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
> +
> kfree(hyp_pgd);
> hyp_pgd = NULL;
> }
> @@ -393,7 +399,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
> */
> static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
> {
> - unmap_range(kvm->arch.pgd, start, size);
> + unmap_range(kvm, kvm->arch.pgd, start, size);
> }
>
> /**
> @@ -413,6 +419,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
> return;
>
> unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
> + kvm_tlb_flush_vmid_ipa(kvm, 0); /* Invalidate TLB ALL */
> free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
> kvm->arch.pgd = NULL;
> }
> @@ -675,7 +682,6 @@ static void handle_hva_to_gpa(struct kvm *kvm,
> static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
> {
> unmap_stage2_range(kvm, gpa, PAGE_SIZE);
> - kvm_tlb_flush_vmid_ipa(kvm, gpa);
> }
>
> int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
> --
> 1.8.2.1
>
>
looks good to me,
-Christoffer
More information about the linux-arm-kernel
mailing list