[PATCH v4 10/21] KVM: arm64: Add support for stage-2 page-aging in generic page-table

Alexandru Elisei alexandru.elisei at arm.com
Tue Sep 8 11:30:15 EDT 2020


Hi Will,

The patch looks good to me, I have a question below.

On 9/7/20 4:23 PM, Will Deacon wrote:
> Add stage-2 mkyoung(), mkold() and is_young() operations to the generic
> page-table code.
>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Quentin Perret <qperret at google.com>
> Reviewed-by: Gavin Shan <gshan at redhat.com>
> Signed-off-by: Will Deacon <will at kernel.org>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h | 38 ++++++++++++
>  arch/arm64/kvm/hyp/pgtable.c         | 86 ++++++++++++++++++++++++++++
>  2 files changed, 124 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index 7258966d3daa..01cad6bbc81b 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -175,6 +175,44 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
>   */
>  int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
>  
> +/**
> + * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
> + * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
> + * @addr:	Intermediate physical address to identify the page-table entry.
> + *
> + * If there is a valid, leaf page-table entry used to translate @addr, then
> + * set the access flag in that entry.
> + *
> + * Return: The old page-table entry prior to setting the flag, 0 on failure.
> + */
> +kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
> +
> +/**
> + * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
> + * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
> + * @addr:	Intermediate physical address to identify the page-table entry.
> + *
> + * If there is a valid, leaf page-table entry used to translate @addr, then
> + * clear the access flag in that entry.
> + *
> + * Note that it is the caller's responsibility to invalidate the TLB after
> + * calling this function to ensure that the updated permissions are visible
> + * to the CPUs.
> + *
> + * Return: The old page-table entry prior to clearing the flag, 0 on failure.
> + */
> +kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
> +
> +/**
> + * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
> + *				   access flag set.
> + * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
> + * @addr:	Intermediate physical address to identify the page-table entry.
> + *
> + * Return: True if the page-table entry has the access flag set, false otherwise.
> + */
> +bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
> +
>  /**
>   * kvm_pgtable_walk() - Walk a page-table.
>   * @pgt:	Page-table structure initialised by kvm_pgtable_*_init().
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 4623380cf9de..87ff56d8bcb5 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -690,6 +690,92 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
>  	return kvm_pgtable_walk(pgt, addr, size, &walker);
>  }
>  
> +struct stage2_attr_data {
> +	kvm_pte_t	attr_set;
> +	kvm_pte_t	attr_clr;
> +	kvm_pte_t	pte;
> +};
> +
> +static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> +			      enum kvm_pgtable_walk_flags flag,
> +			      void * const arg)
> +{
> +	kvm_pte_t pte = *ptep;
> +	struct stage2_attr_data *data = arg;
> +
> +	if (!kvm_pte_valid(pte))
> +		return 0;
> +
> +	data->pte = pte;
> +	pte &= ~data->attr_clr;
> +	pte |= data->attr_set;
> +
> +	/*
> +	 * We may race with the CPU trying to set the access flag here,
> +	 * but worst-case the access flag update gets lost and will be
> +	 * set on the next access instead.
> +	 */
> +	if (data->pte != pte)
> +		WRITE_ONCE(*ptep, pte);
> +
> +	return 0;
> +}
> +
> +static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
> +				    u64 size, kvm_pte_t attr_set,
> +				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte)
> +{
> +	int ret;
> +	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
> +	struct stage2_attr_data data = {
> +		.attr_set	= attr_set & attr_mask,
> +		.attr_clr	= attr_clr & attr_mask,
> +	};
> +	struct kvm_pgtable_walker walker = {
> +		.cb		= stage2_attr_walker,
> +		.arg		= &data,
> +		.flags		= KVM_PGTABLE_WALK_LEAF,
> +	};
> +
> +	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
> +	if (ret)
> +		return ret;
> +
> +	if (orig_pte)
> +		*orig_pte = data.pte;
> +	return 0;
> +}
> +
> +kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
> +{
> +	kvm_pte_t pte = 0;
> +	stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
> +				 &pte);
> +	dsb(ishst);

I am curious about the DSB above. We don't do it when we clear the AF bit, because
in the clear_flush_young() mmu notifier, kvm_age_hva() is followed by
kvm_flush_remote_tlbs() -> kvm_flush_remote_tlbs(), which does a DSB ISHST.

When AF is zero, the entry is not allowed to be stored in a TLB, and that's why we
don't need to issue a TLBI instruction. Do we do the DSB here to make sure all PEs
in the inner shareability domain see the updated translation tables? Or there's
another reason I'm not seeing?

Thanks,
Alex
> +	return pte;
> +}
> +
> +kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
> +{
> +	kvm_pte_t pte = 0;
> +	stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
> +				 &pte);
> +	/*
> +	 * "But where's the TLBI?!", you scream.
> +	 * "Over in the core code", I sigh.
> +	 *
> +	 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
> +	 */
> +	return pte;
> +}
> +
> +bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
> +{
> +	kvm_pte_t pte = 0;
> +	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte);
> +	return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
> +}
> +
>  int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
>  {
>  	size_t pgd_sz;



More information about the linux-arm-kernel mailing list