[PATCH v2 21/26] KVM: Allow for different capacities in kvm_mmu_memory_cache structs
David Matlack
dmatlack at google.com
Tue Mar 22 16:13:05 PDT 2022
On Fri, Mar 18, 2022 at 10:27 PM Anup Patel <anup at brainfault.org> wrote:
>
> On Fri, Mar 11, 2022 at 5:56 AM David Matlack <dmatlack at google.com> wrote:
> >
> > Allow the capacity of the kvm_mmu_memory_cache struct to be chosen at
> > declaration time rather than being fixed for all declarations. This will
> > be used in a follow-up commit to declare an cache in x86 with a capacity
> > of 512+ objects without having to increase the capacity of all caches in
> > KVM.
> >
> > This change requires each cache now specify its capacity at runtime,
> > since the cache struct itself no longer has a fixed capacity known at
> > compile time. To protect against someone accidentally defining a
> > kvm_mmu_memory_cache struct directly (without the extra storage), this
> > commit includes a WARN_ON() in kvm_mmu_topup_memory_cache().
> >
> > This change, unfortunately, adds some grottiness to
> > kvm_phys_addr_ioremap() in arm64, which uses a function-local (i.e.
> > stack-allocated) kvm_mmu_memory_cache struct. Since C does not allow
> > anonymous structs in functions, the new wrapper struct that contains
> > kvm_mmu_memory_cache and the objects pointer array, must be named, which
> > means dealing with an outer and inner struct. The outer struct can't be
> > dropped since then there would be no guarantee the kvm_mmu_memory_cache
> > struct and objects array would be laid out consecutively on the stack.
> >
> > No functional change intended.
> >
> > Signed-off-by: David Matlack <dmatlack at google.com>
> > ---
> > arch/arm64/include/asm/kvm_host.h | 2 +-
> > arch/arm64/kvm/arm.c | 1 +
> > arch/arm64/kvm/mmu.c | 13 +++++++++----
> > arch/mips/include/asm/kvm_host.h | 2 +-
> > arch/mips/kvm/mips.c | 2 ++
> > arch/riscv/include/asm/kvm_host.h | 2 +-
> > arch/riscv/kvm/vcpu.c | 1 +
> > arch/x86/include/asm/kvm_host.h | 8 ++++----
> > arch/x86/kvm/mmu/mmu.c | 9 +++++++++
> > include/linux/kvm_types.h | 19 +++++++++++++++++--
> > virt/kvm/kvm_main.c | 10 +++++++++-
> > 11 files changed, 55 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 5bc01e62c08a..1369415290dd 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -357,7 +357,7 @@ struct kvm_vcpu_arch {
> > bool pause;
> >
> > /* Cache some mmu pages needed inside spinlock regions */
> > - struct kvm_mmu_memory_cache mmu_page_cache;
> > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache);
> >
> > /* Target CPU and feature flags */
> > int target;
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index ecc5958e27fe..5e38385be0ef 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -319,6 +319,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> > vcpu->arch.target = -1;
> > bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
> >
> > + vcpu->arch.mmu_page_cache.capacity = KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
> > vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
> >
> > /* Set up the timer */
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index bc2aba953299..940089ba65ad 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -765,7 +765,12 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
> > {
> > phys_addr_t addr;
> > int ret = 0;
> > - struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
> > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {
> > + .cache = {
> > + .gfp_zero = __GFP_ZERO,
> > + .capacity = KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE,
> > + },
> > + };
> > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
> > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
> > KVM_PGTABLE_PROT_R |
> > @@ -778,14 +783,14 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
> > guest_ipa &= PAGE_MASK;
> >
> > for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
> > - ret = kvm_mmu_topup_memory_cache(&cache,
> > + ret = kvm_mmu_topup_memory_cache(&page_cache.cache,
> > kvm_mmu_cache_min_pages(kvm));
> > if (ret)
> > break;
> >
> > spin_lock(&kvm->mmu_lock);
> > ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
> > - &cache);
> > + &page_cache.cache);
> > spin_unlock(&kvm->mmu_lock);
> > if (ret)
> > break;
> > @@ -793,7 +798,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
> > pa += PAGE_SIZE;
> > }
> >
> > - kvm_mmu_free_memory_cache(&cache);
> > + kvm_mmu_free_memory_cache(&page_cache.cache);
> > return ret;
> > }
> >
> > diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
> > index 717716cc51c5..935511d7fc3a 100644
> > --- a/arch/mips/include/asm/kvm_host.h
> > +++ b/arch/mips/include/asm/kvm_host.h
> > @@ -347,7 +347,7 @@ struct kvm_vcpu_arch {
> > unsigned long pending_exceptions_clr;
> >
> > /* Cache some mmu pages needed inside spinlock regions */
> > - struct kvm_mmu_memory_cache mmu_page_cache;
> > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache);
> >
> > /* vcpu's vzguestid is different on each host cpu in an smp system */
> > u32 vzguestid[NR_CPUS];
> > diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> > index a25e0b73ee70..45c7179144dc 100644
> > --- a/arch/mips/kvm/mips.c
> > +++ b/arch/mips/kvm/mips.c
> > @@ -387,6 +387,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> > if (err)
> > goto out_free_gebase;
> >
> > + vcpu->arch.mmu_page_cache.capacity = KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
> > +
> > return 0;
> >
> > out_free_gebase:
> > diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
> > index 99ef6a120617..5bd4902ebda3 100644
> > --- a/arch/riscv/include/asm/kvm_host.h
> > +++ b/arch/riscv/include/asm/kvm_host.h
> > @@ -186,7 +186,7 @@ struct kvm_vcpu_arch {
> > struct kvm_sbi_context sbi_context;
> >
> > /* Cache pages needed to program page tables with spinlock held */
> > - struct kvm_mmu_memory_cache mmu_page_cache;
> > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache);
> >
> > /* VCPU power-off state */
> > bool power_off;
> > diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
> > index 624166004e36..6a5f5aa45bac 100644
> > --- a/arch/riscv/kvm/vcpu.c
> > +++ b/arch/riscv/kvm/vcpu.c
> > @@ -94,6 +94,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> >
> > /* Mark this VCPU never ran */
> > vcpu->arch.ran_atleast_once = false;
> > + vcpu->arch.mmu_page_cache.capacity = KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
> > vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
>
> There another function stage2_ioremap() which also needs to change
> because this function creates a kvm_mmu_memory_cache on stack.
Thanks for catching that. Will fix in v3.
>
> Regards,
> Anup
>
> >
> > /* Setup ISA features available to VCPU */
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 0f5a36772bdc..544dde11963b 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -692,10 +692,10 @@ struct kvm_vcpu_arch {
> > */
> > struct kvm_mmu *walk_mmu;
> >
> > - struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
> > - struct kvm_mmu_memory_cache mmu_shadow_page_cache;
> > - struct kvm_mmu_memory_cache mmu_shadowed_translation_cache;
> > - struct kvm_mmu_memory_cache mmu_page_header_cache;
> > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_pte_list_desc_cache);
> > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_shadow_page_cache);
> > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_shadowed_translation_cache);
> > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_header_cache);
> >
> > /*
> > * QEMU userspace and the guest each have their own FPU state.
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index dd56b5b9624f..24e7e053e05b 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -5817,12 +5817,21 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
> > {
> > int ret;
> >
> > + vcpu->arch.mmu_pte_list_desc_cache.capacity =
> > + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
> > vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache;
> > vcpu->arch.mmu_pte_list_desc_cache.gfp_zero = __GFP_ZERO;
> >
> > + vcpu->arch.mmu_page_header_cache.capacity =
> > + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
> > vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
> > vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
> >
> > + vcpu->arch.mmu_shadowed_translation_cache.capacity =
> > + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
> > +
> > + vcpu->arch.mmu_shadow_page_cache.capacity =
> > + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
> > vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
> >
> > vcpu->arch.mmu = &vcpu->arch.root_mmu;
> > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > index ac1ebb37a0ff..579cf39986ec 100644
> > --- a/include/linux/kvm_types.h
> > +++ b/include/linux/kvm_types.h
> > @@ -83,14 +83,29 @@ struct gfn_to_pfn_cache {
> > * MMU flows is problematic, as is triggering reclaim, I/O, etc... while
> > * holding MMU locks. Note, these caches act more like prefetch buffers than
> > * classical caches, i.e. objects are not returned to the cache on being freed.
> > + *
> > + * The storage for the cache object pointers is laid out after the struct, to
> > + * allow different declarations to choose different capacities. The capacity
> > + * field defines the number of object pointers available after the struct.
> > */
> > struct kvm_mmu_memory_cache {
> > int nobjs;
> > + int capacity;
> > gfp_t gfp_zero;
> > struct kmem_cache *kmem_cache;
> > - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE];
> > + void *objects[];
> > };
> > -#endif
> > +
> > +#define __DEFINE_KVM_MMU_MEMORY_CACHE(_name, _capacity) \
> > + struct { \
> > + struct kvm_mmu_memory_cache _name; \
> > + void *_name##_objects[_capacity]; \
> > + }
> > +
> > +#define DEFINE_KVM_MMU_MEMORY_CACHE(_name) \
> > + __DEFINE_KVM_MMU_MEMORY_CACHE(_name, KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE)
> > +
> > +#endif /* KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE */
> >
> > #define HALT_POLL_HIST_COUNT 32
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 9581a24c3d17..1d849ba9529f 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -371,9 +371,17 @@ int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
> > {
> > void *obj;
> >
> > + /*
> > + * The capacity fieldmust be initialized since the storage for the
> > + * objects pointer array is laid out after the kvm_mmu_memory_cache
> > + * struct and not known at compile time.
> > + */
> > + if (WARN_ON(mc->capacity == 0))
> > + return -EINVAL;
> > +
> > if (mc->nobjs >= min)
> > return 0;
> > - while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
> > + while (mc->nobjs < mc->capacity) {
> > obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
> > if (!obj)
> > return mc->nobjs >= min ? 0 : -ENOMEM;
> > --
> > 2.35.1.723.g4982287a31-goog
> >
More information about the kvm-riscv
mailing list