[kvmarm] [PATCH v3 06/14] KVM: ARM: Memory virtualization setup

Mon Nov 19 09:42:42 EST 2012

On Mon, Nov 19, 2012 at 6:29 AM, Sundaram, Senthilkumar
<ssundara at qti.qualcomm.com> wrote:
> What is the use of MMU Notifiers in the absence of Shadow Page Table?
>
> Thanks
> Senthil

MMU notifiers are used to manage the stage-2 page tables in the event
of swapping or KSM. Please don't top-top post to this list.

-Christoffer

>> -----Original Message-----
>> From: kvmarm-bounces at lists.cs.columbia.edu [mailto:kvmarm-
>> bounces at lists.cs.columbia.edu] On Behalf Of Christoffer Dall
>> Sent: Monday, October 22, 2012 12:20 PM
>> To: kvm at vger.kernel.org; linux-arm-kernel at lists.infradead.org;
>> kvmarm at lists.cs.columbia.edu
>> Cc: Marcelo Tosatti
>> Subject: [kvmarm] [PATCH v3 06/14] KVM: ARM: Memory virtualization setup
>>
>> This commit introduces the framework for guest memory management
>> through the use of 2nd stage translation. Each VM has a pointer to a level-1
>> table (the pgd field in struct kvm_arch) which is used for the 2nd stage
>> translations. Entries are added when handling guest faults (later patch) and
>> the table itself can be allocated and freed through the following functions
>> implemented in
>> arch/arm/kvm/arm_mmu.c:
>>  - kvm_alloc_stage2_pgd(struct kvm *kvm);
>>  - kvm_free_stage2_pgd(struct kvm *kvm);
>>
>> Each entry in TLBs and caches are tagged with a VMID identifier in addition to
>> ASIDs. The VMIDs are assigned consecutively to VMs in the order that VMs
>> are executed, and caches and tlbs are invalidated when the VMID space has
>> been used to allow for more than 255 simultaenously running guests.
>>
>> The 2nd stage pgd is allocated in kvm_arch_init_vm(). The table is freed in
>> kvm_arch_destroy_vm(). Both functions are called from the main KVM code.
>>
>> We pre-allocate page table memory to be able to synchronize using a
>> spinlock and be called under rcu_read_lock from the MMU notifiers.  We
>> steal the mmu_memory_cache implementation from x86 and adapt for our
>> specific usage.
>>
>> We support MMU notifiers (thanks to Marc Zyngier) through
>> kvm_unmap_hva and kvm_set_spte_hva.
>>
>> Finally, define kvm_phys_addr_ioremap() to map a device at a guest IPA,
>> which is used by VGIC support to map the virtual CPU interface registers to
>> the guest. This support is added by Marc Zyngier.
>>
>> Reviewed-by: Marcelo Tosatti <mtosatti at redhat.com>
>> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
>> Signed-off-by: Christoffer Dall <c.dall at virtualopensystems.com>
>> ---
>>  arch/arm/include/asm/kvm_asm.h  |    2
>>  arch/arm/include/asm/kvm_host.h |   19 ++
>>  arch/arm/include/asm/kvm_mmu.h  |    9 +
>>  arch/arm/kvm/Kconfig            |    1
>>  arch/arm/kvm/arm.c              |   37 ++++
>>  arch/arm/kvm/interrupts.S       |   10 +
>>  arch/arm/kvm/mmu.c              |  393
>> +++++++++++++++++++++++++++++++++++++++
>>  arch/arm/kvm/trace.h            |   46 +++++
>>  8 files changed, 515 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_asm.h
>> b/arch/arm/include/asm/kvm_asm.h index 954bf7c..47a0e57 100644
>> --- a/arch/arm/include/asm/kvm_asm.h
>> +++ b/arch/arm/include/asm/kvm_asm.h
>> @@ -57,6 +57,7 @@
>>  #define ARM_EXCEPTION_HVC      7
>>
>>  #ifndef __ASSEMBLY__
>> +struct kvm;
>>  struct kvm_vcpu;
>>
>>  extern char __kvm_hyp_init[];
>> @@ -71,6 +72,7 @@ extern char __kvm_hyp_code_start[];  extern char
>> __kvm_hyp_code_end[];
>>
>>  extern void __kvm_flush_vm_context(void);
>> +extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
>>
>>  extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);  #endif diff --git
>> a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>> index 15d4c0b..68d1005 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -117,4 +117,23 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu
>> *vcpu, u64 __user *indices);  struct kvm_one_reg;  int
>> kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
>> int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg
>> *reg);
>> +u64 kvm_call_hyp(void *hypfn, ...);
>> +
>> +#define KVM_ARCH_WANT_MMU_NOTIFIER
>> +struct kvm;
>> +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int
>> +kvm_unmap_hva_range(struct kvm *kvm,
>> +                     unsigned long start, unsigned long end); void
>> +kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
>> +
>> +/* We do not have shadow page tables, hence the empty hooks */ static
>> +inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) {
>> +     return 0;
>> +}
>> +
>> +static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
>> +{
>> +     return 0;
>> +}
>>  #endif /* __ARM_KVM_HOST_H__ */
>> diff --git a/arch/arm/include/asm/kvm_mmu.h
>> b/arch/arm/include/asm/kvm_mmu.h index 741ab8f..9bd0508 100644
>> --- a/arch/arm/include/asm/kvm_mmu.h
>> +++ b/arch/arm/include/asm/kvm_mmu.h
>> @@ -33,6 +33,15 @@ int create_hyp_mappings(void *from, void *to);  int
>> create_hyp_io_mappings(void *from, void *to, phys_addr_t);  void
>> free_hyp_pmds(void);
>>
>> +int kvm_alloc_stage2_pgd(struct kvm *kvm); void
>> +kvm_free_stage2_pgd(struct kvm *kvm); int
>> kvm_phys_addr_ioremap(struct
>> +kvm *kvm, phys_addr_t guest_ipa,
>> +                       phys_addr_t pa, unsigned long size);
>> +
>> +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run
>> *run);
>> +
>> +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
>> +
>>  unsigned long kvm_mmu_get_httbr(void);
>>  int kvm_mmu_init(void);
>>  void kvm_mmu_exit(void);
>> diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index
>> a07ddcc..47c5500 100644
>> --- a/arch/arm/kvm/Kconfig
>> +++ b/arch/arm/kvm/Kconfig
>> @@ -36,6 +36,7 @@ config KVM_ARM_HOST
>>       depends on KVM
>>       depends on MMU
>>       depends on CPU_V7 && ARM_VIRT_EXT
>> +     select  MMU_NOTIFIER
>>       ---help---
>>         Provides host support for ARM processors.
>>
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index
>> 8e1ea2b..5ac3132 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -81,12 +81,33 @@ void kvm_arch_sync_events(struct kvm *kvm)  {  }
>>
>> +/**
>> + * kvm_arch_init_vm - initializes a VM data structure
>> + * @kvm:     pointer to the KVM struct
>> + */
>>  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)  {
>> +     int ret = 0;
>> +
>>       if (type)
>>               return -EINVAL;
>>
>> -     return 0;
>> +     ret = kvm_alloc_stage2_pgd(kvm);
>> +     if (ret)
>> +             goto out_fail_alloc;
>> +
>> +     ret = create_hyp_mappings(kvm, kvm + 1);
>> +     if (ret)
>> +             goto out_free_stage2_pgd;
>> +
>> +     /* Mark the initial VMID generation invalid */
>> +     kvm->arch.vmid_gen = 0;
>> +
>> +     return ret;
>> +out_free_stage2_pgd:
>> +     kvm_free_stage2_pgd(kvm);
>> +out_fail_alloc:
>> +     return ret;
>>  }
>>
>>  int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) @@
>> -104,10 +125,16 @@ int kvm_arch_create_memslot(struct
>> kvm_memory_slot *slot, unsigned long npages)
>>       return 0;
>>  }
>>
>> +/**
>> + * kvm_arch_destroy_vm - destroy the VM data structure
>> + * @kvm:     pointer to the KVM struct
>> + */
>>  void kvm_arch_destroy_vm(struct kvm *kvm)  {
>>       int i;
>>
>> +     kvm_free_stage2_pgd(kvm);
>> +
>>       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>               if (kvm->vcpus[i]) {
>>                       kvm_arch_vcpu_free(kvm->vcpus[i]);
>> @@ -189,7 +216,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct
>> kvm *kvm, unsigned int id)
>>       if (err)
>>               goto free_vcpu;
>>
>> +     err = create_hyp_mappings(vcpu, vcpu + 1);
>> +     if (err)
>> +             goto vcpu_uninit;
>> +
>>       return vcpu;
>> +vcpu_uninit:
>> +     kvm_vcpu_uninit(vcpu);
>>  free_vcpu:
>>       kmem_cache_free(kvm_vcpu_cache, vcpu);
>>  out:
>> @@ -198,6 +231,8 @@ out:
>>
>>  void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)  {
>> +     kvm_mmu_free_memory_caches(vcpu);
>> +     kmem_cache_free(kvm_vcpu_cache, vcpu);
>>  }
>>
>>  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git
>> a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index
>> 98a67ca..1c83022 100644
>> --- a/arch/arm/kvm/interrupts.S
>> +++ b/arch/arm/kvm/interrupts.S
>> @@ -33,7 +33,13 @@ __kvm_hyp_code_start:
>>
>> /**********************************************************
>> **********
>>   * Flush per-VMID TLBs
>>   */
>> +ENTRY(__kvm_tlb_flush_vmid)
>> +     bx      lr
>> +ENDPROC(__kvm_tlb_flush_vmid)
>>
>> +/*********************************************************
>> ***********
>> + * Flush TLBs and instruction caches of current CPU for all VMIDs  */
>>  ENTRY(__kvm_flush_vm_context)
>>       bx      lr
>>  ENDPROC(__kvm_flush_vm_context)
>> @@ -41,10 +47,12 @@ ENDPROC(__kvm_flush_vm_context)
>>
>> /**********************************************************
>> **********
>>   *  Hypervisor world-switch code
>>   */
>> -
>>  ENTRY(__kvm_vcpu_run)
>>       bx      lr
>>
>> +ENTRY(kvm_call_hyp)
>> +     bx      lr
>> +
>>
>>
>> /**********************************************************
>> **********
>>   * Hypervisor exception vector and handlers diff --git
>> a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 17c2bf5..f45be86
>> 100644
>> --- a/arch/arm/kvm/mmu.c
>> +++ b/arch/arm/kvm/mmu.c
>> @@ -23,11 +23,52 @@
>>  #include <asm/pgalloc.h>
>>  #include <asm/kvm_arm.h>
>>  #include <asm/kvm_mmu.h>
>> +#include <asm/kvm_asm.h>
>>  #include <asm/mach/map.h>
>> +#include <trace/events/kvm.h>
>> +
>> +#include "trace.h"
>>
>>  static DEFINE_MUTEX(kvm_hyp_pgd_mutex);  static pgd_t *hyp_pgd;
>>
>> +static void kvm_tlb_flush_vmid(struct kvm *kvm) {
>> +     kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); }
>> +
>> +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache
>> *cache,
>> +                               int min, int max)
>> +{
>> +     void *page;
>> +
>> +     BUG_ON(max > KVM_NR_MEM_OBJS);
>> +     if (cache->nobjs >= min)
>> +             return 0;
>> +     while (cache->nobjs < max) {
>> +             page = (void *)__get_free_page(PGALLOC_GFP);
>> +             if (!page)
>> +                     return -ENOMEM;
>> +             cache->objects[cache->nobjs++] = page;
>> +     }
>> +     return 0;
>> +}
>> +
>> +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache
>> *mc) {
>> +     while (mc->nobjs)
>> +             free_page((unsigned long)mc->objects[--mc->nobjs]); }
>> +
>> +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache
>> *mc) {
>> +     void *p;
>> +
>> +     BUG_ON(!mc || !mc->nobjs);
>> +     p = mc->objects[--mc->nobjs];
>> +     return p;
>> +}
>> +
>>  static void free_ptes(pmd_t *pmd, unsigned long addr)  {
>>       pte_t *pte;
>> @@ -201,11 +242,363 @@ int create_hyp_io_mappings(void *from, void *to,
>> phys_addr_t addr)
>>       return __create_hyp_mappings(from, to, &pfn);  }
>>
>> +/**
>> + * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
>> + * @kvm:     The KVM struct pointer for the VM.
>> + *
>> + * Allocates the 1st level table only of size defined by PGD2_ORDER
>> +(can
>> + * support either full 40-bit input addresses or limited to 32-bit
>> +input
>> + * addresses). Clears the allocated pages.
>> + *
>> + * Note we don't need locking here as this is only called when the VM
>> +is
>> + * created, which can only be done once.
>> + */
>> +int kvm_alloc_stage2_pgd(struct kvm *kvm) {
>> +     pgd_t *pgd;
>> +
>> +     if (kvm->arch.pgd != NULL) {
>> +             kvm_err("kvm_arch already initialized?\n");
>> +             return -EINVAL;
>> +     }
>> +
>> +     pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER);
>> +     if (!pgd)
>> +             return -ENOMEM;
>> +
>> +     memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t));
>> +     clean_dcache_area(pgd, PTRS_PER_PGD2 * sizeof(pgd_t));
>> +     kvm->arch.pgd = pgd;
>> +
>> +     return 0;
>> +}
>> +
>> +static void free_guest_pages(pte_t *pte, unsigned long addr) {
>> +     unsigned int i;
>> +     struct page *pte_page;
>> +
>> +     pte_page = virt_to_page(pte);
>> +
>> +     for (i = 0; i < PTRS_PER_PTE; i++) {
>> +             if (pte_present(*pte))
>> +                     put_page(pte_page);
>> +             pte++;
>> +     }
>> +
>> +     WARN_ON(page_count(pte_page) != 1);
>> +}
>> +
>> +static void free_stage2_ptes(pmd_t *pmd, unsigned long addr) {
>> +     unsigned int i;
>> +     pte_t *pte;
>> +     struct page *pmd_page;
>> +
>> +     pmd_page = virt_to_page(pmd);
>> +
>> +     for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
>> +             BUG_ON(pmd_sect(*pmd));
>> +             if (!pmd_none(*pmd) && pmd_table(*pmd)) {
>> +                     pte = pte_offset_kernel(pmd, addr);
>> +                     free_guest_pages(pte, addr);
>> +                     pte_free_kernel(NULL, pte);
>> +
>> +                     put_page(pmd_page);
>> +             }
>> +             pmd++;
>> +     }
>> +
>> +     WARN_ON(page_count(pmd_page) != 1);
>> +}
>> +
>> +/**
>> + * kvm_free_stage2_pgd - free all stage-2 tables
>> + * @kvm:     The KVM struct pointer for the VM.
>> + *
>> + * Walks the level-1 page table pointed to by kvm->arch.pgd and frees
>> +all
>> + * underlying level-2 and level-3 tables before freeing the actual
>> +level-1 table
>> + * and setting the struct pointer to NULL.
>> + *
>> + * Note we don't need locking here as this is only called when the VM
>> +is
>> + * destroyed, which can only be done once.
>> + */
>> +void kvm_free_stage2_pgd(struct kvm *kvm) {
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     unsigned long long i, addr;
>> +     struct page *pud_page;
>> +
>> +     if (kvm->arch.pgd == NULL)
>> +             return;
>> +
>> +     /*
>> +      * We do this slightly different than other places, since we need more
>> +      * than 32 bits and for instance pgd_addr_end converts to unsigned
>> long.
>> +      */
>> +     addr = 0;
>> +     for (i = 0; i < PTRS_PER_PGD2; i++) {
>> +             addr = i * (unsigned long long)PGDIR_SIZE;
>> +             pgd = kvm->arch.pgd + i;
>> +             pud = pud_offset(pgd, addr);
>> +             pud_page = virt_to_page(pud);
>> +
>> +             if (pud_none(*pud))
>> +                     continue;
>> +
>> +             BUG_ON(pud_bad(*pud));
>> +
>> +             pmd = pmd_offset(pud, addr);
>> +             free_stage2_ptes(pmd, addr);
>> +             pmd_free(NULL, pmd);
>> +             put_page(pud_page);
>> +     }
>> +
>> +     WARN_ON(page_count(pud_page) != 1);
>> +     free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER);
>> +     kvm->arch.pgd = NULL;
>> +}
>> +
>> +/**
>> + * stage2_clear_pte -- Clear a stage-2 PTE.
>> + * @kvm:  The VM pointer
>> + * @addr: The physical address of the PTE
>> + *
>> + * Clear a stage-2 PTE, lowering the various ref-counts. Also takes
>> + * care of invalidating the TLBs.  Must be called while holding
>> + * mmu_lock, otherwise another faulting VCPU may come in and mess
>> + * things behind our back.
>> + */
>> +static void stage2_clear_pte(struct kvm *kvm, phys_addr_t addr) {
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     pte_t *pte;
>> +     struct page *page;
>> +
>> +     pgd = kvm->arch.pgd + pgd_index(addr);
>> +     pud = pud_offset(pgd, addr);
>> +     if (pud_none(*pud))
>> +             return;
>> +
>> +     pmd = pmd_offset(pud, addr);
>> +     if (pmd_none(*pmd))
>> +             return;
>> +
>> +     pte = pte_offset_kernel(pmd, addr);
>> +     set_pte_ext(pte, __pte(0), 0);
>> +
>> +     page = virt_to_page(pte);
>> +     put_page(page);
>> +     if (page_count(page) != 1) {
>> +             kvm_tlb_flush_vmid(kvm);
>> +             return;
>> +     }
>> +
>> +     /* Need to remove pte page */
>> +     pmd_clear(pmd);
>> +     pte_free_kernel(NULL, (pte_t *)((unsigned long)pte &
>> PAGE_MASK));
>> +
>> +     page = virt_to_page(pmd);
>> +     put_page(page);
>> +     if (page_count(page) != 1) {
>> +             kvm_tlb_flush_vmid(kvm);
>> +             return;
>> +     }
>> +
>> +     pud_clear(pud);
>> +     pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
>> +
>> +     page = virt_to_page(pud);
>> +     put_page(page);
>> +     kvm_tlb_flush_vmid(kvm);
>> +}
>> +
>> +static int stage2_set_pte(struct kvm *kvm, struct
>> kvm_mmu_memory_cache *cache,
>> +                       phys_addr_t addr, const pte_t *new_pte, bool
>> iomap) {
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     pte_t *pte, old_pte;
>> +
>> +     /* Create 2nd stage page table mapping - Level 1 */
>> +     pgd = kvm->arch.pgd + pgd_index(addr);
>> +     pud = pud_offset(pgd, addr);
>> +     if (pud_none(*pud)) {
>> +             if (!cache)
>> +                     return 0; /* ignore calls from kvm_set_spte_hva */
>> +             pmd = mmu_memory_cache_alloc(cache);
>> +             pud_populate(NULL, pud, pmd);
>> +             pmd += pmd_index(addr);
>> +             get_page(virt_to_page(pud));
>> +     } else
>> +             pmd = pmd_offset(pud, addr);
>> +
>> +     /* Create 2nd stage page table mapping - Level 2 */
>> +     if (pmd_none(*pmd)) {
>> +             if (!cache)
>> +                     return 0; /* ignore calls from kvm_set_spte_hva */
>> +             pte = mmu_memory_cache_alloc(cache);
>> +             clean_pte_table(pte);
>> +             pmd_populate_kernel(NULL, pmd, pte);
>> +             pte += pte_index(addr);
>> +             get_page(virt_to_page(pmd));
>> +     } else
>> +             pte = pte_offset_kernel(pmd, addr);
>> +
>> +     if (iomap && pte_present(*pte))
>> +             return -EFAULT;
>> +
>> +     /* Create 2nd stage page table mapping - Level 3 */
>> +     old_pte = *pte;
>> +     set_pte_ext(pte, *new_pte, 0);
>> +     if (pte_present(old_pte))
>> +             kvm_tlb_flush_vmid(kvm);
>> +     else
>> +             get_page(virt_to_page(pte));
>> +
>> +     return 0;
>> +}
>> +
>> +/**
>> + * kvm_phys_addr_ioremap - map a device range to guest IPA
>> + *
>> + * @kvm:     The KVM pointer
>> + * @guest_ipa:       The IPA at which to insert the mapping
>> + * @pa:              The physical address of the device
>> + * @size:    The size of the mapping
>> + */
>> +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
>> +                       phys_addr_t pa, unsigned long size) {
>> +     phys_addr_t addr, end;
>> +     int ret = 0;
>> +     unsigned long pfn;
>> +     struct kvm_mmu_memory_cache cache = { 0, };
>> +
>> +     end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
>> +     pfn = __phys_to_pfn(pa);
>> +
>> +     for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
>> +             pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE |
>> L_PTE_S2_RDWR);
>> +
>> +             ret = mmu_topup_memory_cache(&cache, 2, 2);
>> +             if (ret)
>> +                     goto out;
>> +             spin_lock(&kvm->mmu_lock);
>> +             ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
>> +             spin_unlock(&kvm->mmu_lock);
>> +             if (ret)
>> +                     goto out;
>> +
>> +             pfn++;
>> +     }
>> +
>> +out:
>> +     mmu_free_memory_cache(&cache);
>> +     return ret;
>> +}
>> +
>>  int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> {
>>       return -EINVAL;
>>  }
>>
>> +static void handle_hva_to_gpa(struct kvm *kvm,
>> +                           unsigned long start,
>> +                           unsigned long end,
>> +                           void (*handler)(struct kvm *kvm,
>> +                                           gpa_t gpa, void *data),
>> +                           void *data)
>> +{
>> +     struct kvm_memslots *slots;
>> +     struct kvm_memory_slot *memslot;
>> +
>> +     slots = kvm_memslots(kvm);
>> +
>> +     /* we only care about the pages that the guest sees */
>> +     kvm_for_each_memslot(memslot, slots) {
>> +             unsigned long hva_start, hva_end;
>> +             gfn_t gfn, gfn_end;
>> +
>> +             hva_start = max(start, memslot->userspace_addr);
>> +             hva_end = min(end, memslot->userspace_addr +
>> +                                     (memslot->npages << PAGE_SHIFT));
>> +             if (hva_start >= hva_end)
>> +                     continue;
>> +
>> +             /*
>> +              * {gfn(page) | page intersects with [hva_start, hva_end)} =
>> +              * {gfn_start, gfn_start+1, ..., gfn_end-1}.
>> +              */
>> +             gfn = hva_to_gfn_memslot(hva_start, memslot);
>> +             gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1,
>> memslot);
>> +
>> +             for (; gfn < gfn_end; ++gfn) {
>> +                     gpa_t gpa = gfn << PAGE_SHIFT;
>> +                     handler(kvm, gpa, data);
>> +             }
>> +     }
>> +}
>> +
>> +static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void
>> +*data) {
>> +     stage2_clear_pte(kvm, gpa);
>> +}
>> +
>> +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) {
>> +     unsigned long end = hva + PAGE_SIZE;
>> +
>> +     if (!kvm->arch.pgd)
>> +             return 0;
>> +
>> +     trace_kvm_unmap_hva(hva);
>> +     handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler,
>> NULL);
>> +     return 0;
>> +}
>> +
>> +int kvm_unmap_hva_range(struct kvm *kvm,
>> +                     unsigned long start, unsigned long end) {
>> +     if (!kvm->arch.pgd)
>> +             return 0;
>> +
>> +     trace_kvm_unmap_hva_range(start, end);
>> +     handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler,
>> NULL);
>> +     return 0;
>> +}
>> +
>> +static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void
>> +*data) {
>> +     pte_t *pte = (pte_t *)data;
>> +
>> +     stage2_set_pte(kvm, NULL, gpa, pte, false); }
>> +
>> +
>> +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) {
>> +     unsigned long end = hva + PAGE_SIZE;
>> +     pte_t stage2_pte;
>> +
>> +     if (!kvm->arch.pgd)
>> +             return;
>> +
>> +     trace_kvm_set_spte_hva(hva);
>> +     stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
>> +     handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler,
>> &stage2_pte);
>> +}
>> +
>> +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) {
>> +     mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
>> +}
>> +
>>  unsigned long kvm_mmu_get_httbr(void)
>>  {
>>       return virt_to_phys(hyp_pgd);
>> diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index
>> f8869c1..862b2cc 100644
>> --- a/arch/arm/kvm/trace.h
>> +++ b/arch/arm/kvm/trace.h
>> @@ -39,7 +39,53 @@ TRACE_EVENT(kvm_exit,
>>       TP_printk("PC: 0x%08lx", __entry->vcpu_pc)  );
>>
>> +TRACE_EVENT(kvm_unmap_hva,
>> +     TP_PROTO(unsigned long hva),
>> +     TP_ARGS(hva),
>>
>> +     TP_STRUCT__entry(
>> +             __field(        unsigned long,  hva             )
>> +     ),
>> +
>> +     TP_fast_assign(
>> +             __entry->hva            = hva;
>> +     ),
>> +
>> +     TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva) );
>> +
>> +TRACE_EVENT(kvm_unmap_hva_range,
>> +     TP_PROTO(unsigned long start, unsigned long end),
>> +     TP_ARGS(start, end),
>> +
>> +     TP_STRUCT__entry(
>> +             __field(        unsigned long,  start           )
>> +             __field(        unsigned long,  end             )
>> +     ),
>> +
>> +     TP_fast_assign(
>> +             __entry->start          = start;
>> +             __entry->end            = end;
>> +     ),
>> +
>> +     TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
>> +               __entry->start, __entry->end)
>> +);
>> +
>> +TRACE_EVENT(kvm_set_spte_hva,
>> +     TP_PROTO(unsigned long hva),
>> +     TP_ARGS(hva),
>> +
>> +     TP_STRUCT__entry(
>> +             __field(        unsigned long,  hva             )
>> +     ),
>> +
>> +     TP_fast_assign(
>> +             __entry->hva            = hva;
>> +     ),
>> +
>> +     TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) );
>>
>>  #endif /* _TRACE_KVM_H */
>>
>>
>> _______________________________________________
>> kvmarm mailing list
>> kvmarm at lists.cs.columbia.edu
>> https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm