[PATCH v5 08/14] KVM: arm64: Protect stage-2 traversal with RCU
Ben Gardon
bgardon at google.com
Wed Nov 9 14:25:38 PST 2022
On Mon, Nov 7, 2022 at 1:57 PM Oliver Upton <oliver.upton at linux.dev> wrote:
>
> Use RCU to safely walk the stage-2 page tables in parallel. Acquire and
> release the RCU read lock when traversing the page tables. Defer the
> freeing of table memory to an RCU callback. Indirect the calls into RCU
> and provide stubs for hypervisor code, as RCU is not available in such a
> context.
>
> The RCU protection doesn't amount to much at the moment, as readers are
> already protected by the read-write lock (all walkers that free table
> memory take the write lock). Nonetheless, a subsequent change will
> futher relax the locking requirements around the stage-2 MMU, thereby
> depending on RCU.
>
> Signed-off-by: Oliver Upton <oliver.upton at linux.dev>
> ---
> arch/arm64/include/asm/kvm_pgtable.h | 49 ++++++++++++++++++++++++++++
> arch/arm64/kvm/hyp/pgtable.c | 10 +++++-
> arch/arm64/kvm/mmu.c | 14 +++++++-
> 3 files changed, 71 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index e70cf57b719e..7634b6964779 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0)
>
> typedef u64 kvm_pte_t;
>
> +/*
> + * RCU cannot be used in a non-kernel context such as the hyp. As such, page
> + * table walkers used in hyp do not call into RCU and instead use other
> + * synchronization mechanisms (such as a spinlock).
> + */
> +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
> +
> typedef kvm_pte_t *kvm_pteref_t;
>
> static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
> @@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
> return pteref;
> }
>
> +static inline void kvm_pgtable_walk_begin(void) {}
> +static inline void kvm_pgtable_walk_end(void) {}
> +
> +static inline bool kvm_pgtable_walk_lock_held(void)
> +{
> + return true;
Forgive my ignorance, but does hyp not use a MMU lock at all? Seems
like this would be a good place to add a lockdep check.
> +}
> +
> +#else
> +
> +typedef kvm_pte_t __rcu *kvm_pteref_t;
> +
> +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
> +{
> + return rcu_dereference_check(pteref, !shared);
Same here, could add a lockdep check depending on shared.
> +}
> +
> +static inline void kvm_pgtable_walk_begin(void)
> +{
> + rcu_read_lock();
> +}
> +
> +static inline void kvm_pgtable_walk_end(void)
> +{
> + rcu_read_unlock();
> +}
> +
> +static inline bool kvm_pgtable_walk_lock_held(void)
> +{
> + return rcu_read_lock_held();
Likewise could do some lockdep here.
> +}
> +
> +#endif
> +
> #define KVM_PTE_VALID BIT(0)
>
> #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
> @@ -202,11 +243,14 @@ struct kvm_pgtable {
> * children.
> * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
> * children.
> + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
> + * with other software walkers.
> */
> enum kvm_pgtable_walk_flags {
> KVM_PGTABLE_WALK_LEAF = BIT(0),
> KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
> KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
> + KVM_PGTABLE_WALK_SHARED = BIT(3),
Not sure if necessary, but it might pay to have 3 shared options:
exclusive, shared mmu lock, no mmu lock if we ever want lockless fast
page faults.
> };
>
> struct kvm_pgtable_visit_ctx {
> @@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx {
> typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
> enum kvm_pgtable_walk_flags visit);
>
> +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
> +{
> + return ctx->flags & KVM_PGTABLE_WALK_SHARED;
> +}
> +
> /**
> * struct kvm_pgtable_walker - Hook into a page-table walk.
> * @cb: Callback function to invoke during the walk.
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 7c9782347570..d8d963521d4e 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
> enum kvm_pgtable_walk_flags visit)
> {
> struct kvm_pgtable_walker *walker = data->walker;
> +
> + /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */
> + WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held());
> return walker->cb(ctx, visit);
> }
>
> @@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
> .end = PAGE_ALIGN(walk_data.addr + size),
> .walker = walker,
> };
> + int r;
> +
> + kvm_pgtable_walk_begin();
> + r = _kvm_pgtable_walk(pgt, &walk_data);
> + kvm_pgtable_walk_end();
>
> - return _kvm_pgtable_walk(pgt, &walk_data);
> + return r;
> }
>
> struct leaf_walk_data {
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 73ae908eb5d9..52e042399ba5 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
>
> static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
>
> +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
> +{
> + struct page *page = container_of(head, struct page, rcu_head);
> + void *pgtable = page_to_virt(page);
> + u32 level = page_private(page);
> +
> + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
> +}
> +
> static void stage2_free_removed_table(void *addr, u32 level)
> {
> - kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level);
> + struct page *page = virt_to_page(addr);
> +
> + set_page_private(page, (unsigned long)level);
> + call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
> }
>
> static void kvm_host_get_page(void *addr)
> --
> 2.38.1.431.g37b22c650d-goog
>
More information about the linux-arm-kernel
mailing list