[PATCH 08/20] KVM: arm64: Convert unmap_stage2_range() to generic page-table API
Will Deacon
will at kernel.org
Thu Jul 30 11:33:54 EDT 2020
Convert unmap_stage2_range() to use kvm_pgtable_stage2_unmap() instead
of walking the page-table directly.
Cc: Marc Zyngier <maz at kernel.org>
Cc: Quentin Perret <qperret at google.com>
Signed-off-by: Will Deacon <will at kernel.org>
---
arch/arm64/kvm/mmu.c | 58 +++++++++++++++++++++++++-------------------
1 file changed, 33 insertions(+), 25 deletions(-)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index f6b7220412af..d2ce44183b98 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -39,6 +39,32 @@ static bool is_iomap(unsigned long flags)
return flags & KVM_S2PTE_FLAG_IS_IOMAP;
}
+/*
+ * Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
+ * we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
+ * CONFIG_LOCKUP_DETECTOR, CONFIG_LOCKDEP. Additionally, holding the lock too
+ * long will also starve other vCPUs. We have to also make sure that the page
+ * tables are not freed while we released the lock.
+ */
+#define stage2_apply_range_resched(kvm, addr, end, fn) \
+({ \
+ int ret; \
+ struct kvm *__kvm = (kvm); \
+ u64 next, __addr = (addr), __end = (end); \
+ do { \
+ void *cookie = __kvm->arch.mmu.pgt_cookie; \
+ if (!cookie) \
+ break; \
+ next = stage2_pgd_addr_end(__kvm, __addr, __end); \
+ ret = fn(cookie, __addr, next - __addr); \
+ if (ret) \
+ break; \
+ if (next != __end) \
+ cond_resched_lock(&__kvm->mmu_lock); \
+ } while (__addr = next, __addr != __end); \
+ ret; \
+})
+
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -256,8 +282,8 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp)
* end up writing old data to disk.
*
* This is why right after unmapping a page/section and invalidating
- * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
- * the IO subsystem will never hit in the cache.
+ * the corresponding TLBs, we flush to make sure the IO subsystem will
+ * never hit in the cache.
*
* This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
* we then fully enforce cacheability of RAM, no matter what the guest
@@ -375,36 +401,18 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* be called while holding mmu_lock (unless for freeing the stage2 pgd before
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
+ *
+ * Return: 0 on success, negative error code on failure.
*/
static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
{
struct kvm *kvm = mmu->kvm;
- pgd_t *pgd;
- phys_addr_t addr = start, end = start + size;
- phys_addr_t next;
+ phys_addr_t end = start + size;
assert_spin_locked(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
-
- pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
- do {
- /*
- * Make sure the page table is still active, as another thread
- * could have possibly freed the page table, while we released
- * the lock.
- */
- if (!READ_ONCE(mmu->pgd))
- break;
- next = stage2_pgd_addr_end(kvm, addr, end);
- if (!stage2_pgd_none(kvm, *pgd))
- unmap_stage2_p4ds(mmu, pgd, addr, next);
- /*
- * If the range is too large, release the kvm->mmu_lock
- * to prevent starvation and lockup detector warnings.
- */
- if (next != end)
- cond_resched_lock(&kvm->mmu_lock);
- } while (pgd++, addr = next, addr != end);
+ WARN_ON(stage2_apply_range_resched(kvm, start, end,
+ kvm_pgtable_stage2_unmap));
}
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
--
2.28.0.rc0.142.g3c755180ce-goog
More information about the linux-arm-kernel
mailing list