[PATCH v2] KVM: arm64: nv: Optimize unmapping of shadow S2-MMU tables

Wed Feb 25 03:59:05 PST 2026

Hi Marc,

On 24-11-2025 16:33, Ganapatrao Kulkarni wrote:
> On 10/28/2025 5:59 PM, Marc Zyngier wrote:
>> On Tue, 28 Oct 2025 06:02:03 +0000,
>> Ganapatrao Kulkarni <gankulkarni at os.amperecomputing.com> wrote:
>>>
>>> On 10/23/2025 8:05 PM, Marc Zyngier wrote:
>>>> On Thu, 23 Oct 2025 12:11:42 +0100,
>>>> Ganapatrao Kulkarni <gankulkarni at os.amperecomputing.com> wrote:
>>>>>
>>>>>
>>>>> Hi Marc, Oliver,
>>>>>
>>>>> On 10/13/2025 12:21 PM, Ganapatrao Kulkarni wrote:
>>>>>> As of commit ec14c272408a ("KVM: arm64: nv: Unmap/flush shadow
>>>>>> stage 2 page tables"), an unmap of a canonical IPA range mapped at L1
>>>>>> triggers invalidation in L1 S2-MMU and in all active shadow (L2) 
>>>>>> S2-MMU
>>>>>> tables. Because there is no direct mapping to locate the 
>>>>>> corresponding
>>>>>> shadow IPAs, the code falls back to a full S2-MMU page-table walk and
>>>>>> invalidation across the entire L1 address space.
>>>>>>
>>>>>> For 4K pages this causes roughly 256K loop iterations (about 8M for
>>>>>> 64K pages) per unmap, which can severely impact performance on large
>>>>>> systems and even cause soft lockups during NV (L1/L2) boots with many
>>>>>> CPUs and large memory. It also causes long delays during L1 reboot.
>>>>>>
>>>>>> This patch adds a maple-tree-based lookup that records canonical- 
>>>>>> IPA to
>>>>>> shadow-IPA mappings whenever a page is mapped into any shadow (L2)
>>>>>> table. On unmap, the lookup is used to target only those shadow IPAs
>>>>>> which are fully or partially mapped in shadow S2-MMU tables, avoiding
>>>>>> a full-address-space walk and unnecessary unmap/flush operations.
>>>>>>
>>>>>> The lookup is updated on map/unmap operations so entries remain
>>>>>> consistent with shadow table state. Use it during unmap to invalidate
>>>>>> only affected shadow IPAs, avoiding unnecessary CPU work and reducing
>>>>>> latency when shadow mappings are sparse.
>>>>>>
>>>>>> Reviewed-by: Christoph Lameter (Ampere) <cl at gentwo.org>
>>>>>> Signed-off-by: Ganapatrao Kulkarni 
>>>>>> <gankulkarni at os.amperecomputing.com>
>>>>>> ---
>>>>>>
>>>>>> Changes since v1:
>>>>>>         Rebased to 6.18-rc1.
>>>>>>         Fixed alignment issue while adding the shadow ipa range
>>>>>>         to lookup.
>>>>>>
>>>>>> Changes since RFC v1:
>>>>>>         Added maple tree based lookup and updated with review
>>>>>>         comments from [1].
>>>>>>
>>>>>> [1] https://lkml.indiana.edu/2403.0/03801.html
>>>>>>
>>>>>>     arch/arm64/include/asm/kvm_host.h   |   3 +
>>>>>>     arch/arm64/include/asm/kvm_nested.h |   9 +++
>>>>>>     arch/arm64/kvm/mmu.c                |  17 ++--
>>>>>>     arch/arm64/kvm/nested.c             | 120 ++++++++++++++++++++ 
>>>>>> ++++++--
>>>>>>     4 files changed, 138 insertions(+), 11 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/ 
>>>>>> include/asm/kvm_host.h
>>>>>> index b763293281c8..e774681c6ba4 100644
>>>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>>>> @@ -227,6 +227,9 @@ struct kvm_s2_mmu {
>>>>>>          * >0: Somebody is actively using this.
>>>>>>          */
>>>>>>         atomic_t refcnt;
>>>>>> +
>>>>>> +    /* For IPA to shadow IPA lookup */
>>>>>> +    struct maple_tree nested_mmu_mt;
>>>>>>     };
>>>>>>       struct kvm_arch_memory_slot {
>>>>>> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/ 
>>>>>> include/asm/kvm_nested.h
>>>>>> index f7c06a840963..5b7c4e7ed18f 100644
>>>>>> --- a/arch/arm64/include/asm/kvm_nested.h
>>>>>> +++ b/arch/arm64/include/asm/kvm_nested.h
>>>>>> @@ -69,6 +69,8 @@ extern void kvm_init_nested(struct kvm *kvm);
>>>>>>     extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
>>>>>>     extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
>>>>>>     extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
>>>>>> +extern int add_to_shadow_ipa_lookup(struct kvm_pgtable *pgt, u64 
>>>>>> shadow_ipa, u64 ipa,
>>>>>> +        u64 size);
>>>>>>       union tlbi_info;
>>>>>>     @@ -95,6 +97,12 @@ struct kvm_s2_trans {
>>>>>>         u64 desc;
>>>>>>     };
>>>>>>     +struct shadow_ipa_map {
>>>>>> +    u64 shadow_ipa;
>>>>>> +    u64 ipa;
>>>>>> +    u64 size;
>>>>>> +};
>>>>>> +
>>>>>>     static inline phys_addr_t kvm_s2_trans_output(struct 
>>>>>> kvm_s2_trans *trans)
>>>>>>     {
>>>>>>         return trans->output;
>>>>>> @@ -132,6 +140,7 @@ extern int kvm_s2_handle_perm_fault(struct 
>>>>>> kvm_vcpu *vcpu,
>>>>>>     extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 
>>>>>> esr_el2);
>>>>>>     extern void kvm_nested_s2_wp(struct kvm *kvm);
>>>>>>     extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block);
>>>>>> +extern void kvm_nested_s2_unmap_range(struct kvm *kvm, u64 ipa, 
>>>>>> u64 size, bool may_block);
>>>>>>     extern void kvm_nested_s2_flush(struct kvm *kvm);
>>>>>>       unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu,
>>>>>> u64 val);
>>>>>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>>>>>> index 7cc964af8d30..27c120556e1b 100644
>>>>>> --- a/arch/arm64/kvm/mmu.c
>>>>>> +++ b/arch/arm64/kvm/mmu.c
>>>>>> @@ -1872,6 +1872,10 @@ static int user_mem_abort(struct kvm_vcpu 
>>>>>> *vcpu, phys_addr_t fault_ipa,
>>>>>>             ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, 
>>>>>> fault_ipa, vma_pagesize,
>>>>>>                              __pfn_to_phys(pfn), prot,
>>>>>>                              memcache, flags);
>>>>>> +
>>>>>> +        /* Add to lookup, if canonical IPA range mapped to shadow 
>>>>>> mmu */
>>>>>> +        if (nested)
>>>>>> +            add_to_shadow_ipa_lookup(pgt, fault_ipa, ipa, 
>>>>>> vma_pagesize);
>>>>>>         }
>>>>>>       out_unlock:
>>>>>> @@ -2094,14 +2098,15 @@ int kvm_handle_guest_abort(struct kvm_vcpu 
>>>>>> *vcpu)
>>>>>>       bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range
>>>>>> *range)
>>>>>>     {
>>>>>> +    gpa_t start = range->start << PAGE_SHIFT;
>>>>>> +    gpa_t end = (range->end - range->start) << PAGE_SHIFT;
>>>>>> +    bool may_block = range->may_block;
>>>>>> +
>>>>>>         if (!kvm->arch.mmu.pgt)
>>>>>>             return false;
>>>>>>     -    __unmap_stage2_range(&kvm->arch.mmu, range->start <<
>>>>>> PAGE_SHIFT,
>>>>>> -                 (range->end - range->start) << PAGE_SHIFT,
>>>>>> -                 range->may_block);
>>>>>> -
>>>>>> -    kvm_nested_s2_unmap(kvm, range->may_block);
>>>>>> +    __unmap_stage2_range(&kvm->arch.mmu, start, end, may_block);
>>>>>> +    kvm_nested_s2_unmap_range(kvm, start, end, may_block);
>>>>>>         return false;
>>>>>>     }
>>>>>>     @@ -2386,7 +2391,7 @@ void kvm_arch_flush_shadow_memslot(struct
>>>>>> kvm *kvm,
>>>>>>           write_lock(&kvm->mmu_lock);
>>>>>>         kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size, true);
>>>>>> -    kvm_nested_s2_unmap(kvm, true);
>>>>>> +    kvm_nested_s2_unmap_range(kvm, gpa, size, true);
>>>>>>         write_unlock(&kvm->mmu_lock);
>>>>>>     }
>>>>>>     diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
>>>>>> index 7a045cad6bdf..3a7035e7526a 100644
>>>>>> --- a/arch/arm64/kvm/nested.c
>>>>>> +++ b/arch/arm64/kvm/nested.c
>>>>>> @@ -7,6 +7,7 @@
>>>>>>     #include <linux/bitfield.h>
>>>>>>     #include <linux/kvm.h>
>>>>>>     #include <linux/kvm_host.h>
>>>>>> +#include <linux/maple_tree.h>
>>>>>>       #include <asm/fixmap.h>
>>>>>>     #include <asm/kvm_arm.h>
>>>>>> @@ -725,6 +726,7 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu 
>>>>>> *mmu)
>>>>>>         mmu->tlb_vttbr = VTTBR_CNP_BIT;
>>>>>>         mmu->nested_stage2_enabled = false;
>>>>>>         atomic_set(&mmu->refcnt, 0);
>>>>>> +    mt_init_flags(&mmu->nested_mmu_mt, MM_MT_FLAGS);
>>>>>>     }
>>>>>>       void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
>>>>>> @@ -1067,6 +1069,99 @@ void kvm_nested_s2_wp(struct kvm *kvm)
>>>>>>         kvm_invalidate_vncr_ipa(kvm, 0, BIT(kvm->arch.mmu.pgt- 
>>>>>> >ia_bits));
>>>>>>     }
>>>>>>     +/*
>>>>>> + * Store range of canonical IPA mapped to a nested stage 2 mmu 
>>>>>> table.
>>>>>> + * Canonical IPA used as pivot in maple tree for the lookup later
>>>>>> + * while IPA unmap/flush.
>>>>>> + */
>>>>>> +int add_to_shadow_ipa_lookup(struct kvm_pgtable *pgt, u64 
>>>>>> shadow_ipa,
>>>>>> +        u64 ipa, u64 size)
>>>>>> +{
>>>>>> +    struct kvm_s2_mmu *mmu;
>>>>>> +    struct shadow_ipa_map *entry;
>>>>>> +    unsigned long start, end;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    start = ALIGN_DOWN(ipa, size);
>>>>>> +    end = start + size;
>>>>>> +    mmu = pgt->mmu;
>>>>>> +
>>>>>> +    entry = kzalloc(sizeof(struct shadow_ipa_map), 
>>>>>> GFP_KERNEL_ACCOUNT);
>>>>>> +
>>>>>> +    if (!entry)
>>>>>> +        return -ENOMEM;
>>>>>> +
>>>>>> +    entry->ipa = start;
>>>>>> +    entry->shadow_ipa = ALIGN_DOWN(shadow_ipa, size);
>>>>>> +    entry->size = size;
>>>>>> +    ret = mtree_store_range(&mmu->nested_mmu_mt, start, end - 1, 
>>>>>> entry,
>>>>>> +              GFP_KERNEL_ACCOUNT);
>>>>>> +    if (ret) {
>>>>>> +        kfree(entry);
>>>>>> +        WARN_ON(ret);
>>>>>> +    }
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static void nested_mtree_erase(struct maple_tree *mt, unsigned 
>>>>>> long start,
>>>>>> +        unsigned long size)
>>>>>> +{
>>>>>> +    void *entry = NULL;
>>>>>> +
>>>>>> +    MA_STATE(mas, mt, start, start + size - 1);
>>>>>> +
>>>>>> +    mtree_lock(mt);
>>>>>> +    entry = mas_erase(&mas);
>>>>>> +    mtree_unlock(mt);
>>>>>> +    kfree(entry);
>>>>>> +}
>>>>>> +
>>>>>> +static void nested_mtree_erase_and_unmap_all(struct kvm_s2_mmu *mmu,
>>>>>> +        unsigned long start, unsigned long end, bool may_block)
>>>>>> +{
>>>>>> +    struct shadow_ipa_map *entry;
>>>>>> +
>>>>>> +    mt_for_each(&mmu->nested_mmu_mt, entry, start, 
>>>>>> kvm_phys_size(mmu)) {
>>>>>> +        kvm_stage2_unmap_range(mmu, entry->shadow_ipa, entry->size,
>>>>>> +                may_block);
>>>>>> +        kfree(entry);
>>>>>> +    }
>>>>>> +
>>>>>> +    mtree_destroy(&mmu->nested_mmu_mt);
>>>>>> +    mt_init_flags(&mmu->nested_mmu_mt, MM_MT_FLAGS);
>>>>>> +}
>>>>>> +
>>>>>> +void kvm_nested_s2_unmap_range(struct kvm *kvm, u64 ipa, u64 size,
>>>>>> +        bool may_block)
>>>>>> +{
>>>>>> +    int i;
>>>>>> +    struct shadow_ipa_map *entry;
>>>>>> +
>>>>>> +    lockdep_assert_held_write(&kvm->mmu_lock);
>>>>>> +
>>>>>> +    for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
>>>>>> +        struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
>>>>>> +        unsigned long start = ipa;
>>>>>> +        unsigned long end = ipa + size;
>>>>>> +
>>>>>> +        if (!kvm_s2_mmu_valid(mmu))
>>>>>> +            continue;
>>>>>> +
>>>>>> +        do {
>>>>>> +            entry = mt_find(&mmu->nested_mmu_mt, &start, end - 1);
>>>>>> +            if (!entry)
>>>>>> +                break;
>>>>>> +
>>>>>> +            kvm_stage2_unmap_range(mmu, entry->shadow_ipa,
>>>>>> +                            entry->size, may_block);
>>>>>> +            start = entry->ipa + entry->size;
>>>>>> +            nested_mtree_erase(&mmu->nested_mmu_mt, entry->ipa,
>>>>>> +                            entry->size);
>>>>>> +        } while (start < end);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>>     void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
>>>>>>     {
>>>>>>         int i;
>>>>>> @@ -1076,8 +1171,10 @@ void kvm_nested_s2_unmap(struct kvm *kvm, 
>>>>>> bool may_block)
>>>>>>         for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
>>>>>>             struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
>>>>>>     -        if (kvm_s2_mmu_valid(mmu))
>>>>>> -            kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), 
>>>>>> may_block);
>>>>>> +        if (!kvm_s2_mmu_valid(mmu))
>>>>>> +            continue;
>>>>>> +
>>>>>> +        nested_mtree_erase_and_unmap_all(mmu, 0, 
>>>>>> kvm_phys_size(mmu), may_block);
>>>>>>         }
>>>>>>           kvm_invalidate_vncr_ipa(kvm, 0,
>>>>>> BIT(kvm->arch.mmu.pgt->ia_bits));
>>>>>> @@ -1091,9 +1188,14 @@ void kvm_nested_s2_flush(struct kvm *kvm)
>>>>>>           for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
>>>>>>             struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
>>>>>> +        struct shadow_ipa_map *entry;
>>>>>> +        unsigned long start = 0;
>>>>>>     -        if (kvm_s2_mmu_valid(mmu))
>>>>>> -            kvm_stage2_flush_range(mmu, 0, kvm_phys_size(mmu));
>>>>>> +        if (!kvm_s2_mmu_valid(mmu))
>>>>>> +            continue;
>>>>>> +
>>>>>> +        mt_for_each(&mmu->nested_mmu_mt, entry, start, 
>>>>>> kvm_phys_size(mmu))
>>>>>> +            kvm_stage2_flush_range(mmu, entry->shadow_ipa, entry- 
>>>>>> >size);
>>>>>>         }
>>>>>>     }
>>>>>>     @@ -1104,8 +1206,16 @@ void kvm_arch_flush_shadow_all(struct kvm
>>>>>> *kvm)
>>>>>>         for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
>>>>>>             struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
>>>>>>     -        if (!WARN_ON(atomic_read(&mmu->refcnt)))
>>>>>> +        if (!WARN_ON(atomic_read(&mmu->refcnt))) {
>>>>>> +            struct shadow_ipa_map *entry;
>>>>>> +            unsigned long start = 0;
>>>>>> +
>>>>>>                 kvm_free_stage2_pgd(mmu);
>>>>>> +
>>>>>> +            mt_for_each(&mmu->nested_mmu_mt, entry, start, 
>>>>>> kvm_phys_size(mmu))
>>>>>> +                kfree(entry);
>>>>>> +            mtree_destroy(&mmu->nested_mmu_mt);
>>>>>> +        }
>>>>>>         }
>>>>>>         kvfree(kvm->arch.nested_mmus);
>>>>>>         kvm->arch.nested_mmus = NULL;
>>>>>
>>>>> Any review comments or suggestions for this patch?
>>>>
>>>> None. This patch is obviously lacking the basic requirements that such
>>>> an "optimisation" should handle, such as dealing with multiple
>>>> mappings to the same IPA in the shadow S2, hence will happily fail to
>>>> correctly unmap stuff. There is no documentation, and no test.
>>>>
>>> Thanks for the comment.
>>> How about adding list of multiple mappings ranges to the maple tree
>>> entry/node while adding to the lookup and later unmapping every
>>> range present in that list?
>>
>> How will that work when the various mappings don't have the same size?
> 
> Can we have a merged new pivot?
> Say old IPA range is from A to B and the newer range is from C to D 
> (Which are intersecting partially or fully).
> Then can we create new pivot (delete old and insert new) with the range 
> MIN(A,C) to MAX(B,D).
> The new pivot will have list of entries of both old and new shadow ipa 
> ranges.
> This shall be repeated till all intersecting pivots are merged.
> 
> While unmapping, on first intersect detection, unmap all listed shadow 
> IPAs.

I'm Vishnu.
Following Ganapat’s departure from Ampere,
I have begun following up on his outstanding patch and
would appreciate your guidance on the proposed approach.

Do you have any recommendations or alternative steps to suggest?

Regards,
-Vishnu.
>>
>>> I tested this patch on an AmpereOne system (2 NUMA nodes, 96 CPUs
>>> per node, numa balance enabled) with large vCPU counts and large
>>> memory to L1 and L2.  The current full-address-space walk caused
>>> very large unmap/flush work and significant delays (exacerbated by
>>> NUMA balancing / page migration activity).  The targeted unmap using
>>> the list per node removes only the affected mappings and reduces the
>>> unmap latency substantially in our workloads.
>>
>> I really don't care how badly things perform. The important thing is
>> that this is architecturally correct, while your approach isn't.
>>
>>> I booted multiple L1s, each hosting several L2s, and observed no
>>> panics or failures related to missing support for multiple‑IPA
>>> mappings.
>>
>> I'm sorry, but Linux isn't a validation tool for the architecture. You
>> have clearly designed something around Linux's own behaviour, not the
>> architectural requirements.
>>
>>> If you have any test cases or scenarios that would validate support
>>> for multiple IPA mappings, could you please share them?
>>
>> The onus is on *you* to provide them, not me.
>>
> Using below kvm selftest code, could generate the 2 shadow IPAs, mapping 
> to same canonical IPA (when ran from L1 shell).
> This code generates 2 shadow IPA ranges of same size(PAGE SIZE) and 
> mapped to same Canonical IPA.
> 
> #include <stdio.h>
> #include <stdlib.h>
> #include <errno.h>
> #include <linux/atomic.h>
> #include <linux/sizes.h>
> 
> #include "kvm_util.h"
> #include "test_util.h"
> #include "processor.h"
> #include "ucall_common.h"
> 
> #define PAGE_SIZE  4096
> 
> static void guest_code(uint64_t slot1_gpa, uint64_t slot2_gpa, uint64_t 
> size, uint64_t page_size)
> {
>          uint64_t gpa;
> 
>          for (gpa = slot1_gpa; gpa < slot1_gpa + size; gpa += page_size)
>                  vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
> 
>          for (gpa = slot2_gpa; gpa < slot2_gpa + size; gpa += page_size)
>                  vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
> 
>          /* Write to slot 1 in loop */
>          while(1) {
>                  for (gpa = slot1_gpa; gpa < slot1_gpa + size; gpa += 
> page_size)
>                          vcpu_arch_put_guest(*((volatile uint64_t 
> *)gpa), gpa);
>          }
> 
>          GUEST_PRINTF("Guest DONE\n");
>          GUEST_DONE();
> }
> 
> static void run_vcpu(struct kvm_vcpu *vcpu)
> {
>          struct ucall uc;
> 
>          do {
>                  vcpu_run(vcpu);
>                  switch (get_ucall(vcpu, &uc)) {
>                          case UCALL_ABORT:
>                                  REPORT_GUEST_ASSERT(uc);
>                                  break;
>                          case UCALL_PRINTF:
>                                  printf("%s", uc.buffer);
>                                  break;
>                          case UCALL_DONE:
>                                  break;
>                          default:
>                                  TEST_FAIL("Unknown ucall %lu", uc.cmd);
>                          }
>          } while (uc.cmd != UCALL_DONE);
> }
> 
> int main(int argc, char *argv[])
> {
>          uint64_t gpa, slot_size, slot_stride, i;
>          int slot, fd;
>          struct kvm_vcpu *vcpu;
>          struct kvm_vm *vm;
>          void *mem;
>          int first_slot, nr_slots;
>          uint64_t start_gpa;
> 
>          slot_size = SZ_1M * 128;
>          slot_stride =  SZ_2G;
>          start_gpa = SZ_4G;
>          first_slot = 1;
>          nr_slots = 2;
> 
>          vcpu = malloc(sizeof(vcpu));
>          TEST_ASSERT(vcpu, "Failed to allocate vCPU");
> 
>          vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, 1,
>                                      (nr_slots * slot_size) / PAGE_SIZE, 
> guest_code, &vcpu);
> 
>          fd = kvm_memfd_alloc(slot_size, false);
>          mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, 
> fd, 0);
>          TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
> 
>          for (slot = first_slot; slot <= nr_slots; slot++) {
>                  gpa = start_gpa + ((slot - first_slot) * slot_stride);
> 
>                  vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, 
> mem);
>                  for (i = 0; i < slot_size; i += vm->page_size)
>                          virt_pg_map(vm, gpa + i, gpa + i);
>          }
> 
>          vcpu_args_set(vcpu, 4, start_gpa, start_gpa + SZ_2G, slot_size, 
> vm->page_size);
>          run_vcpu(vcpu);
> }
> 
> from host, tried page migration using migratepages command to trigger L1 
> crash with PATCH v2.