[PATCH v5 09/14] KVM: arm64: Atomically update stage 2 leaf attributes in parallel walks

Ben Gardon bgardon at google.com
Wed Nov 9 14:26:19 PST 2022


On Mon, Nov 7, 2022 at 1:58 PM Oliver Upton <oliver.upton at linux.dev> wrote:
>
> The stage2 attr walker is already used for parallel walks. Since commit
> f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation
> during dirty logging"), KVM acquires the read lock when
> write-unprotecting a PTE. However, the walker only uses a simple store
> to update the PTE. This is safe as the only possible race is with
> hardware updates to the access flag, which is benign.
>
> However, a subsequent change to KVM will allow more changes to the stage
> 2 page tables to be done in parallel. Prepare the stage 2 attribute
> walker by performing atomic updates to the PTE when walking in parallel.
>
> Signed-off-by: Oliver Upton <oliver.upton at linux.dev>
> ---
>  arch/arm64/kvm/hyp/pgtable.c | 31 ++++++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 9 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index d8d963521d4e..a34e2050f931 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -185,7 +185,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
>                                       kvm_pteref_t pteref, u32 level)
>  {
>         enum kvm_pgtable_walk_flags flags = data->walker->flags;
> -       kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false);
> +       kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED);
>         struct kvm_pgtable_visit_ctx ctx = {
>                 .ptep   = ptep,
>                 .old    = READ_ONCE(*ptep),
> @@ -675,6 +675,16 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
>         return !!pte;
>  }
>
> +static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
> +{
> +       if (!kvm_pgtable_walk_shared(ctx)) {
> +               WRITE_ONCE(*ctx->ptep, new);
> +               return true;
> +       }
> +
> +       return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old;
> +}
> +
>  static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
>                            struct kvm_pgtable_mm_ops *mm_ops)
>  {
> @@ -986,7 +996,9 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
>                     stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old))
>                         mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
>                                                   kvm_granule_size(ctx->level));
> -               WRITE_ONCE(*ctx->ptep, pte);
> +
> +               if (!stage2_try_set_pte(ctx, pte))
> +                       return -EAGAIN;
>         }
>
>         return 0;
> @@ -995,7 +1007,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
>  static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
>                                     u64 size, kvm_pte_t attr_set,
>                                     kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
> -                                   u32 *level)
> +                                   u32 *level, enum kvm_pgtable_walk_flags flags)
>  {
>         int ret;
>         kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
> @@ -1006,7 +1018,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
>         struct kvm_pgtable_walker walker = {
>                 .cb             = stage2_attr_walker,
>                 .arg            = &data,
> -               .flags          = KVM_PGTABLE_WALK_LEAF,
> +               .flags          = flags | KVM_PGTABLE_WALK_LEAF,
>         };
>
>         ret = kvm_pgtable_walk(pgt, addr, size, &walker);
> @@ -1025,14 +1037,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
>  {
>         return stage2_update_leaf_attrs(pgt, addr, size, 0,
>                                         KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
> -                                       NULL, NULL);
> +                                       NULL, NULL, 0);
>  }
>
>  kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
>  {
>         kvm_pte_t pte = 0;
>         stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
> -                                &pte, NULL);
> +                                &pte, NULL, 0);
>         dsb(ishst);
>         return pte;
>  }
> @@ -1041,7 +1053,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
>  {
>         kvm_pte_t pte = 0;
>         stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
> -                                &pte, NULL);
> +                                &pte, NULL, 0);
>         /*
>          * "But where's the TLBI?!", you scream.
>          * "Over in the core code", I sigh.
> @@ -1054,7 +1066,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
>  bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
>  {
>         kvm_pte_t pte = 0;
> -       stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
> +       stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0);

Would be nice to have an enum for KVM_PGTABLE_WALK_EXCLUSIVE so this
doesn't just have to pass 0.


>         return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
>  }
>
> @@ -1077,7 +1089,8 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
>         if (prot & KVM_PGTABLE_PROT_X)
>                 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
>
> -       ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
> +       ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
> +                                      KVM_PGTABLE_WALK_SHARED);
>         if (!ret)
>                 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
>         return ret;
> --
> 2.38.1.431.g37b22c650d-goog
>



More information about the linux-arm-kernel mailing list