[RFC PATCH 06/37] KVM: MMU: Move struct kvm_mmu_page to common code
David Matlack
dmatlack at google.com
Thu Dec 8 11:38:26 PST 2022
Move struct kvm_mmu_page to common code and all x86-specific fields into
kvm_mmu_page_arch.
This commit increases the size of struct kvm_mmu_page by 64 bytes on
x86_64 (184 bytes -> 248 bytes). The size of this struct can be reduced
in future commits by moving TDP MMU root fields into a separate struct
and by dynamically allocating fields only used by the Shadow MMU.
No functional change intended.
Signed-off-by: David Matlack <dmatlack at google.com>
---
arch/x86/include/asm/kvm/mmu_types.h | 62 ++++++++++++++
arch/x86/include/asm/kvm_host.h | 4 -
arch/x86/kvm/mmu/mmu.c | 122 ++++++++++++++-------------
arch/x86/kvm/mmu/mmu_internal.h | 83 ------------------
arch/x86/kvm/mmu/mmutrace.h | 4 +-
arch/x86/kvm/mmu/paging_tmpl.h | 10 +--
arch/x86/kvm/mmu/tdp_mmu.c | 8 +-
arch/x86/kvm/mmu/tdp_mmu.h | 2 +-
include/kvm/mmu_types.h | 32 ++++++-
9 files changed, 167 insertions(+), 160 deletions(-)
diff --git a/arch/x86/include/asm/kvm/mmu_types.h b/arch/x86/include/asm/kvm/mmu_types.h
index 35f893ebab5a..affcb520b482 100644
--- a/arch/x86/include/asm/kvm/mmu_types.h
+++ b/arch/x86/include/asm/kvm/mmu_types.h
@@ -2,6 +2,8 @@
#ifndef __ASM_KVM_MMU_TYPES_H
#define __ASM_KVM_MMU_TYPES_H
+#include <linux/bitmap.h>
+#include <linux/list.h>
#include <linux/types.h>
/*
@@ -53,4 +55,64 @@ struct kvm_mmu_page_role_arch {
u16 passthrough:1;
};
+struct kvm_rmap_head {
+ unsigned long val;
+};
+
+struct kvm_mmu_page_arch {
+ struct hlist_node hash_link;
+
+ bool shadow_mmu_page;
+ bool unsync;
+ u8 mmu_valid_gen;
+
+ /*
+ * The shadow page can't be replaced by an equivalent huge page
+ * because it is being used to map an executable page in the guest
+ * and the NX huge page mitigation is enabled.
+ */
+ bool nx_huge_page_disallowed;
+
+ /*
+ * Stores the result of the guest translation being shadowed by each
+ * SPTE. KVM shadows two types of guest translations: nGPA -> GPA
+ * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
+ * cases the result of the translation is a GPA and a set of access
+ * constraints.
+ *
+ * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
+ * access permissions are stored in the lower bits. Note, for
+ * convenience and uniformity across guests, the access permissions are
+ * stored in KVM format (e.g. ACC_EXEC_MASK) not the raw guest format.
+ */
+ u64 *shadowed_translation;
+
+ unsigned int unsync_children;
+
+ /* Rmap pointers to all parent sptes. */
+ struct kvm_rmap_head parent_ptes;
+
+ DECLARE_BITMAP(unsync_child_bitmap, 512);
+
+ /*
+ * Tracks shadow pages that, if zapped, would allow KVM to create an NX
+ * huge page. A shadow page will have nx_huge_page_disallowed set but
+ * not be on the list if a huge page is disallowed for other reasons,
+ * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
+ * isn't properly aligned, etc...
+ */
+ struct list_head possible_nx_huge_page_link;
+
+#ifdef CONFIG_X86_32
+ /*
+ * Used out of the mmu-lock to avoid reading spte values while an
+ * update is in progress; see the comments in __get_spte_lockless().
+ */
+ int clear_spte_count;
+#endif
+
+ /* Number of writes since the last time traversal visited this page. */
+ atomic_t write_flooding_count;
+};
+
#endif /* !__ASM_KVM_MMU_TYPES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ebcd7a0dabef..f5743a652e10 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -329,10 +329,6 @@ union kvm_cpu_role {
};
};
-struct kvm_rmap_head {
- unsigned long val;
-};
-
struct kvm_pio_request {
unsigned long linear_rip;
unsigned long count;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 11cef930d5ed..e47f35878ab5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -350,7 +350,7 @@ static void count_spte_clear(u64 *sptep, u64 spte)
/* Ensure the spte is completely set before we increase the count */
smp_wmb();
- sp->clear_spte_count++;
+ sp->arch.clear_spte_count++;
}
static void __set_spte(u64 *sptep, u64 spte)
@@ -432,7 +432,7 @@ static u64 __get_spte_lockless(u64 *sptep)
int count;
retry:
- count = sp->clear_spte_count;
+ count = sp->arch.clear_spte_count;
smp_rmb();
spte.spte_low = orig->spte_low;
@@ -442,7 +442,7 @@ static u64 __get_spte_lockless(u64 *sptep)
smp_rmb();
if (unlikely(spte.spte_low != orig->spte_low ||
- count != sp->clear_spte_count))
+ count != sp->arch.clear_spte_count))
goto retry;
return spte.spte;
@@ -699,7 +699,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
return sp->gfn;
if (!sp->role.arch.direct)
- return sp->shadowed_translation[index] >> PAGE_SHIFT;
+ return sp->arch.shadowed_translation[index] >> PAGE_SHIFT;
return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
}
@@ -713,7 +713,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
{
if (sp_has_gptes(sp))
- return sp->shadowed_translation[index] & ACC_ALL;
+ return sp->arch.shadowed_translation[index] & ACC_ALL;
/*
* For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs,
@@ -734,7 +734,7 @@ static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index,
gfn_t gfn, unsigned int access)
{
if (sp_has_gptes(sp)) {
- sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
+ sp->arch.shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
return;
}
@@ -825,18 +825,18 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
* on the list if KVM is reusing an existing shadow page, i.e. if KVM
* links a shadow page at multiple points.
*/
- if (!list_empty(&sp->possible_nx_huge_page_link))
+ if (!list_empty(&sp->arch.possible_nx_huge_page_link))
return;
++kvm->stat.nx_lpage_splits;
- list_add_tail(&sp->possible_nx_huge_page_link,
+ list_add_tail(&sp->arch.possible_nx_huge_page_link,
&kvm->arch.possible_nx_huge_pages);
}
static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
bool nx_huge_page_possible)
{
- sp->nx_huge_page_disallowed = true;
+ sp->arch.nx_huge_page_disallowed = true;
if (nx_huge_page_possible)
track_possible_nx_huge_page(kvm, sp);
@@ -861,16 +861,16 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- if (list_empty(&sp->possible_nx_huge_page_link))
+ if (list_empty(&sp->arch.possible_nx_huge_page_link))
return;
--kvm->stat.nx_lpage_splits;
- list_del_init(&sp->possible_nx_huge_page_link);
+ list_del_init(&sp->arch.possible_nx_huge_page_link);
}
static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- sp->nx_huge_page_disallowed = false;
+ sp->arch.nx_huge_page_disallowed = false;
untrack_possible_nx_huge_page(kvm, sp);
}
@@ -1720,11 +1720,11 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
{
MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
- hlist_del(&sp->hash_link);
+ hlist_del(&sp->arch.hash_link);
list_del(&sp->link);
free_page((unsigned long)sp->spt);
if (!sp->role.arch.direct)
- free_page((unsigned long)sp->shadowed_translation);
+ free_page((unsigned long)sp->arch.shadowed_translation);
kmem_cache_free(mmu_page_header_cache, sp);
}
@@ -1739,13 +1739,13 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
if (!parent_pte)
return;
- pte_list_add(cache, parent_pte, &sp->parent_ptes);
+ pte_list_add(cache, parent_pte, &sp->arch.parent_ptes);
}
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
u64 *parent_pte)
{
- pte_list_remove(parent_pte, &sp->parent_ptes);
+ pte_list_remove(parent_pte, &sp->arch.parent_ptes);
}
static void drop_parent_pte(struct kvm_mmu_page *sp,
@@ -1761,7 +1761,7 @@ static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
u64 *sptep;
struct rmap_iterator iter;
- for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
+ for_each_rmap_spte(&sp->arch.parent_ptes, &iter, sptep) {
mark_unsync(sptep);
}
}
@@ -1771,9 +1771,9 @@ static void mark_unsync(u64 *spte)
struct kvm_mmu_page *sp;
sp = sptep_to_sp(spte);
- if (__test_and_set_bit(spte_index(spte), sp->unsync_child_bitmap))
+ if (__test_and_set_bit(spte_index(spte), sp->arch.unsync_child_bitmap))
return;
- if (sp->unsync_children++)
+ if (sp->arch.unsync_children++)
return;
kvm_mmu_mark_parents_unsync(sp);
}
@@ -1799,7 +1799,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
{
int i;
- if (sp->unsync)
+ if (sp->arch.unsync)
for (i=0; i < pvec->nr; i++)
if (pvec->page[i].sp == sp)
return 0;
@@ -1812,9 +1812,9 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
{
- --sp->unsync_children;
- WARN_ON((int)sp->unsync_children < 0);
- __clear_bit(idx, sp->unsync_child_bitmap);
+ --sp->arch.unsync_children;
+ WARN_ON((int)sp->arch.unsync_children < 0);
+ __clear_bit(idx, sp->arch.unsync_child_bitmap);
}
static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
@@ -1822,7 +1822,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
{
int i, ret, nr_unsync_leaf = 0;
- for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
+ for_each_set_bit(i, sp->arch.unsync_child_bitmap, 512) {
struct kvm_mmu_page *child;
u64 ent = sp->spt[i];
@@ -1833,7 +1833,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
child = spte_to_child_sp(ent);
- if (child->unsync_children) {
+ if (child->arch.unsync_children) {
if (mmu_pages_add(pvec, child, i))
return -ENOSPC;
@@ -1845,7 +1845,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
nr_unsync_leaf += ret;
} else
return ret;
- } else if (child->unsync) {
+ } else if (child->arch.unsync) {
nr_unsync_leaf++;
if (mmu_pages_add(pvec, child, i))
return -ENOSPC;
@@ -1862,7 +1862,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
struct kvm_mmu_pages *pvec)
{
pvec->nr = 0;
- if (!sp->unsync_children)
+ if (!sp->arch.unsync_children)
return 0;
mmu_pages_add(pvec, sp, INVALID_INDEX);
@@ -1871,9 +1871,9 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- WARN_ON(!sp->unsync);
+ WARN_ON(!sp->arch.unsync);
trace_kvm_mmu_sync_page(sp);
- sp->unsync = 0;
+ sp->arch.unsync = 0;
--kvm->stat.mmu_unsync;
}
@@ -1894,7 +1894,7 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
}
#define for_each_valid_sp(_kvm, _sp, _list) \
- hlist_for_each_entry(_sp, _list, hash_link) \
+ hlist_for_each_entry(_sp, _list, arch.hash_link) \
if (is_obsolete_sp((_kvm), (_sp))) { \
} else
@@ -1934,7 +1934,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
/* TDP MMU pages do not use the MMU generation. */
return !is_tdp_mmu_page(sp) &&
- unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+ unlikely(sp->arch.mmu_valid_gen != kvm->arch.mmu_valid_gen);
}
struct mmu_page_path {
@@ -2006,7 +2006,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
WARN_ON(idx == INVALID_INDEX);
clear_unsync_child_bit(sp, idx);
level++;
- } while (!sp->unsync_children);
+ } while (!sp->arch.unsync_children);
}
static int mmu_sync_children(struct kvm_vcpu *vcpu,
@@ -2053,7 +2053,7 @@ static int mmu_sync_children(struct kvm_vcpu *vcpu,
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
{
- atomic_set(&sp->write_flooding_count, 0);
+ atomic_set(&sp->arch.write_flooding_count, 0);
}
static void clear_sp_write_flooding_count(u64 *spte)
@@ -2094,7 +2094,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
* Unsync pages must not be left as is, because the new
* upper-level page will be write-protected.
*/
- if (role.level > PG_LEVEL_4K && sp->unsync)
+ if (role.level > PG_LEVEL_4K && sp->arch.unsync)
kvm_mmu_prepare_zap_page(kvm, sp,
&invalid_list);
continue;
@@ -2104,7 +2104,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
if (sp->role.arch.direct)
goto out;
- if (sp->unsync) {
+ if (sp->arch.unsync) {
if (KVM_BUG_ON(!vcpu, kvm))
break;
@@ -2163,25 +2163,26 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
if (!role.arch.direct)
- sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
+ sp->arch.shadowed_translation =
+ kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
- INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
+ INIT_LIST_HEAD(&sp->arch.possible_nx_huge_page_link);
/*
* active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
* depends on valid pages being added to the head of the list. See
* comments in kvm_zap_obsolete_pages().
*/
- sp->mmu_valid_gen = kvm->arch.mmu_valid_gen;
+ sp->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen;
list_add(&sp->link, &kvm->arch.active_mmu_pages);
kvm_account_mmu_page(kvm, sp);
sp->gfn = gfn;
sp->role = role;
- sp->shadow_mmu_page = true;
- hlist_add_head(&sp->hash_link, sp_list);
+ sp->arch.shadow_mmu_page = true;
+ hlist_add_head(&sp->arch.hash_link, sp_list);
if (sp_has_gptes(sp))
account_shadowed(kvm, sp);
@@ -2368,7 +2369,7 @@ static void __link_shadow_page(struct kvm *kvm,
mmu_page_add_parent_pte(cache, sp, sptep);
- if (sp->unsync_children || sp->unsync)
+ if (sp->arch.unsync_children || sp->arch.unsync)
mark_unsync(sptep);
}
@@ -2421,7 +2422,8 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
* avoids retaining a large number of stale nested SPs.
*/
if (tdp_enabled && invalid_list &&
- child->role.arch.guest_mode && !child->parent_ptes.val)
+ child->role.arch.guest_mode &&
+ !child->arch.parent_ptes.val)
return kvm_mmu_prepare_zap_page(kvm, child,
invalid_list);
}
@@ -2449,7 +2451,7 @@ static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
u64 *sptep;
struct rmap_iterator iter;
- while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
+ while ((sptep = rmap_get_first(&sp->arch.parent_ptes, &iter)))
drop_parent_pte(sp, sptep);
}
@@ -2496,7 +2498,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
if (!sp->role.invalid && sp_has_gptes(sp))
unaccount_shadowed(kvm, sp);
- if (sp->unsync)
+ if (sp->arch.unsync)
kvm_unlink_unsync_page(kvm, sp);
if (!refcount_read(&sp->root_refcount)) {
/* Count self */
@@ -2527,7 +2529,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
zapped_root = !is_obsolete_sp(kvm, sp);
}
- if (sp->nx_huge_page_disallowed)
+ if (sp->arch.nx_huge_page_disallowed)
unaccount_nx_huge_page(kvm, sp);
sp->role.invalid = 1;
@@ -2704,7 +2706,7 @@ static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
trace_kvm_mmu_unsync_page(sp);
++kvm->stat.mmu_unsync;
- sp->unsync = 1;
+ sp->arch.unsync = 1;
kvm_mmu_mark_parents_unsync(sp);
}
@@ -2739,7 +2741,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
if (!can_unsync)
return -EPERM;
- if (sp->unsync)
+ if (sp->arch.unsync)
continue;
if (prefetch)
@@ -2764,7 +2766,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
* for write, i.e. unsync cannot transition from 0->1
* while this CPU holds mmu_lock for read (or write).
*/
- if (READ_ONCE(sp->unsync))
+ if (READ_ONCE(sp->arch.unsync))
continue;
}
@@ -2796,8 +2798,8 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
* 2.2 Guest issues TLB flush.
* That causes a VM Exit.
*
- * 2.3 Walking of unsync pages sees sp->unsync is
- * false and skips the page.
+ * 2.3 Walking of unsync pages sees sp->arch.unsync
+ * is false and skips the page.
*
* 2.4 Guest accesses GVA X.
* Since the mapping in the SP was not updated,
@@ -2805,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
* gets used.
* 1.1 Host marks SP
* as unsync
- * (sp->unsync = true)
+ * (sp->arch.unsync = true)
*
* The write barrier below ensures that 1.1 happens before 1.2 and thus
* the situation in 2.4 does not arise. It pairs with the read barrier
@@ -3126,7 +3128,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
cur_level == fault->goal_level &&
is_shadow_present_pte(spte) &&
!is_large_pte(spte) &&
- spte_to_child_sp(spte)->nx_huge_page_disallowed) {
+ spte_to_child_sp(spte)->arch.nx_huge_page_disallowed) {
/*
* A small SPTE exists for this pfn, but FNAME(fetch),
* direct_map(), or kvm_tdp_mmu_map() would like to create a
@@ -3902,7 +3904,7 @@ static bool is_unsync_root(hpa_t root)
/*
* The read barrier orders the CPU's read of SPTE.W during the page table
- * walk before the reads of sp->unsync/sp->unsync_children here.
+ * walk before the reads of sp->arch.{unsync,unsync_children} here.
*
* Even if another CPU was marking the SP as unsync-ed simultaneously,
* any guest page table changes are not guaranteed to be visible anyway
@@ -3922,7 +3924,7 @@ static bool is_unsync_root(hpa_t root)
if (WARN_ON_ONCE(!sp))
return false;
- if (sp->unsync || sp->unsync_children)
+ if (sp->arch.unsync || sp->arch.unsync_children)
return true;
return false;
@@ -5510,8 +5512,8 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
if (sp->role.level == PG_LEVEL_4K)
return false;
- atomic_inc(&sp->write_flooding_count);
- return atomic_read(&sp->write_flooding_count) >= 3;
+ atomic_inc(&sp->arch.write_flooding_count);
+ return atomic_read(&sp->arch.write_flooding_count) >= 3;
}
/*
@@ -6389,7 +6391,7 @@ static bool shadow_mmu_try_split_huge_pages(struct kvm *kvm,
continue;
/* SPs with level >PG_LEVEL_4K should never by unsync. */
- if (WARN_ON_ONCE(sp->unsync))
+ if (WARN_ON_ONCE(sp->arch.unsync))
continue;
/* Don't bother splitting huge pages on invalid SPs. */
@@ -6941,8 +6943,8 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
*/
sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
struct kvm_mmu_page,
- possible_nx_huge_page_link);
- WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
+ arch.possible_nx_huge_page_link);
+ WARN_ON_ONCE(!sp->arch.nx_huge_page_disallowed);
WARN_ON_ONCE(!sp->role.arch.direct);
/*
@@ -6977,7 +6979,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
else
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
- WARN_ON_ONCE(sp->nx_huge_page_disallowed);
+ WARN_ON_ONCE(sp->arch.nx_huge_page_disallowed);
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index fd4990c8b0e9..af2ae4887e35 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -44,89 +44,6 @@ extern bool dbg;
#define INVALID_PAE_ROOT 0
#define IS_VALID_PAE_ROOT(x) (!!(x))
-struct kvm_mmu_page {
- /*
- * Note, "link" through "spt" fit in a single 64 byte cache line on
- * 64-bit kernels, keep it that way unless there's a reason not to.
- */
- struct list_head link;
- struct hlist_node hash_link;
-
- bool shadow_mmu_page;
- bool unsync;
- u8 mmu_valid_gen;
-
- /*
- * The shadow page can't be replaced by an equivalent huge page
- * because it is being used to map an executable page in the guest
- * and the NX huge page mitigation is enabled.
- */
- bool nx_huge_page_disallowed;
-
- /*
- * The following two entries are used to key the shadow page in the
- * hash table.
- */
- union kvm_mmu_page_role role;
- gfn_t gfn;
-
- u64 *spt;
-
- /*
- * Stores the result of the guest translation being shadowed by each
- * SPTE. KVM shadows two types of guest translations: nGPA -> GPA
- * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
- * cases the result of the translation is a GPA and a set of access
- * constraints.
- *
- * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
- * access permissions are stored in the lower bits. Note, for
- * convenience and uniformity across guests, the access permissions are
- * stored in KVM format (e.g. ACC_EXEC_MASK) not the raw guest format.
- */
- u64 *shadowed_translation;
-
- /* Currently serving as active root */
- refcount_t root_refcount;
-
- unsigned int unsync_children;
- union {
- struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
- tdp_ptep_t ptep;
- };
- union {
- DECLARE_BITMAP(unsync_child_bitmap, 512);
- struct {
- struct work_struct tdp_mmu_async_work;
- void *tdp_mmu_async_data;
- };
- };
-
- /*
- * Tracks shadow pages that, if zapped, would allow KVM to create an NX
- * huge page. A shadow page will have nx_huge_page_disallowed set but
- * not be on the list if a huge page is disallowed for other reasons,
- * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
- * isn't properly aligned, etc...
- */
- struct list_head possible_nx_huge_page_link;
-#ifdef CONFIG_X86_32
- /*
- * Used out of the mmu-lock to avoid reading spte values while an
- * update is in progress; see the comments in __get_spte_lockless().
- */
- int clear_spte_count;
-#endif
-
- /* Number of writes since the last time traversal visited this page. */
- atomic_t write_flooding_count;
-
-#ifdef CONFIG_X86_64
- /* Used for freeing the page asynchronously if it is a TDP MMU page. */
- struct rcu_head rcu_head;
-#endif
-};
-
extern struct kmem_cache *mmu_page_header_cache;
static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index ffd10ce3eae3..335f26dabdf3 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -16,11 +16,11 @@
__field(bool, unsync)
#define KVM_MMU_PAGE_ASSIGN(sp) \
- __entry->mmu_valid_gen = sp->mmu_valid_gen; \
+ __entry->mmu_valid_gen = sp->arch.mmu_valid_gen; \
__entry->gfn = sp->gfn; \
__entry->role = sp->role.word; \
__entry->root_count = refcount_read(&sp->root_refcount); \
- __entry->unsync = sp->unsync;
+ __entry->unsync = sp->arch.unsync;
#define KVM_MMU_PAGE_PRINTK() ({ \
const char *saved_ptr = trace_seq_buffer_ptr(p); \
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index e15ec1c473da..18bb92b70a01 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -671,7 +671,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
* KVM_REQ_MMU_SYNC is not necessary but it
* expedites the process.
*/
- if (sp->unsync_children &&
+ if (sp->arch.unsync_children &&
mmu_sync_children(vcpu, sp, false))
return RET_PF_RETRY;
}
@@ -921,7 +921,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
pt_element_t gpte;
gpa_t pte_gpa;
- if (!sp->unsync)
+ if (!sp->arch.unsync)
break;
pte_gpa = FNAME(get_level1_sp_gpa)(sp);
@@ -942,7 +942,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
FNAME(prefetch_gpte)(vcpu, sp, sptep, gpte, false);
}
- if (!sp->unsync_children)
+ if (!sp->arch.unsync_children)
break;
}
write_unlock(&vcpu->kvm->mmu_lock);
@@ -974,8 +974,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
}
/*
- * Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is
- * safe because:
+ * Using the information in sp->arch.shadowed_translation
+ * (kvm_mmu_page_get_gfn()) is safe because:
* - The spte has a reference to the struct page, so the pfn for a given gfn
* can't change unless all sptes pointing to it are nuked first.
*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 34d674080170..66231c7ed31e 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -270,7 +270,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
gfn_t gfn, union kvm_mmu_page_role role)
{
- INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
+ INIT_LIST_HEAD(&sp->arch.possible_nx_huge_page_link);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
@@ -385,7 +385,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
{
tdp_unaccount_mmu_page(kvm, sp);
- if (!sp->nx_huge_page_disallowed)
+ if (!sp->arch.nx_huge_page_disallowed)
return;
if (shared)
@@ -393,7 +393,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
else
lockdep_assert_held_write(&kvm->mmu_lock);
- sp->nx_huge_page_disallowed = false;
+ sp->arch.nx_huge_page_disallowed = false;
untrack_possible_nx_huge_page(kvm, sp);
if (shared)
@@ -1181,7 +1181,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
sp = tdp_mmu_alloc_sp(vcpu);
tdp_mmu_init_child_sp(sp, &iter);
- sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
+ sp->arch.nx_huge_page_disallowed = fault->huge_page_disallowed;
if (is_shadow_present_pte(iter.old_spte))
r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 19d3153051a3..e6a929089715 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -73,7 +73,7 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
#ifdef CONFIG_X86_64
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp)
{
- return !sp->shadow_mmu_page;
+ return !sp->arch.shadow_mmu_page;
}
#else
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
diff --git a/include/kvm/mmu_types.h b/include/kvm/mmu_types.h
index 14099956fdac..a9da33d4baa8 100644
--- a/include/kvm/mmu_types.h
+++ b/include/kvm/mmu_types.h
@@ -3,8 +3,11 @@
#define __KVM_MMU_TYPES_H
#include <linux/bug.h>
-#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <linux/refcount.h>
#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
#include <asm/kvm/mmu_types.h>
@@ -36,4 +39,31 @@ static_assert(sizeof(union kvm_mmu_page_role) == sizeof_field(union kvm_mmu_page
typedef u64 __rcu *tdp_ptep_t;
+struct kvm_mmu_page {
+ struct list_head link;
+
+ union kvm_mmu_page_role role;
+
+ /* The start of the GFN region mapped by this shadow page. */
+ gfn_t gfn;
+
+ /* The page table page. */
+ u64 *spt;
+
+ /* The PTE that points to this shadow page. */
+ tdp_ptep_t ptep;
+
+ /* Used for freeing TDP MMU pages asynchronously. */
+ struct rcu_head rcu_head;
+
+ /* The number of references to this shadow page as a root. */
+ refcount_t root_refcount;
+
+ /* Used for tearing down an entire page table tree. */
+ struct work_struct tdp_mmu_async_work;
+ void *tdp_mmu_async_data;
+
+ struct kvm_mmu_page_arch arch;
+};
+
#endif /* !__KVM_MMU_TYPES_H */
--
2.39.0.rc1.256.g54fd8350bd-goog
More information about the kvm-riscv
mailing list