[PATCH 2/6] arm64: Convert pte handling from inline asm to using (cmp)xchg

Will Deacon will.deacon at arm.com
Thu Aug 17 05:59:58 PDT 2017


On Tue, Jul 25, 2017 at 02:53:04PM +0100, Catalin Marinas wrote:
> With the support for hardware updates of the access and dirty states,
> the following pte handling functions had to be implemented using
> exclusives: __ptep_test_and_clear_young(), ptep_get_and_clear(),
> ptep_set_wrprotect() and ptep_set_access_flags(). To take advantage of
> the LSE atomic instructions and also make the code cleaner, convert
> these pte functions to use the more generic cmpxchg()/xchg().
> 
> Cc: Will Deacon <will.deacon at arm.com>
> Acked-by: Mark Rutland <mark.rutland at arm.com>
> Acked-by: Steve Capper <steve.capper at arm.com>
> Signed-off-by: Catalin Marinas <catalin.marinas at arm.com>
> ---
>  arch/arm64/include/asm/pgtable.h | 67 +++++++++++++++++-----------------------
>  arch/arm64/mm/fault.c            | 23 ++++++--------
>  2 files changed, 39 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 6eae342ced6b..4580d5992d0c 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -39,6 +39,7 @@
>  
>  #ifndef __ASSEMBLY__
>  
> +#include <asm/cmpxchg.h>
>  #include <asm/fixmap.h>
>  #include <linux/mmdebug.h>
>  
> @@ -173,6 +174,11 @@ static inline pte_t pte_clear_rdonly(pte_t pte)
>  	return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
>  }
>  
> +static inline pte_t pte_set_rdonly(pte_t pte)
> +{
> +	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
> +}
> +
>  static inline pte_t pte_mkpresent(pte_t pte)
>  {
>  	return set_pte_bit(pte, __pgprot(PTE_VALID));
> @@ -593,20 +599,15 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
>  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
>  static inline int __ptep_test_and_clear_young(pte_t *ptep)
>  {
> -	pteval_t pteval;
> -	unsigned int tmp, res;
> +	pte_t old_pte, pte;
>  
> -	asm volatile("//	__ptep_test_and_clear_young\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	ubfx	%w3, %w0, %5, #1	// extract PTE_AF (young)\n"
> -	"	and	%0, %0, %4		// clear PTE_AF\n"
> -	"	stxr	%w1, %0, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
> -	: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
> +	do {
> +		old_pte = READ_ONCE(*ptep);
> +		pte = pte_mkold(old_pte);
> +	} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
> +				 pte_val(pte)) != pte_val(old_pte));

Given that cmpxchg returns the value it read, can you avoid the READ_ONCE
on subsequent iterations of the loop? Same with the other cmpxchgs you've
added in this patch.

> -	return res;
> +	return pte_young(old_pte);
>  }
>  
>  static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
> @@ -630,17 +631,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
>  static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
>  				       unsigned long address, pte_t *ptep)
>  {
> -	pteval_t old_pteval;
> -	unsigned int tmp;
> -
> -	asm volatile("//	ptep_get_and_clear\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	stxr	%w1, xzr, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
> -
> -	return __pte(old_pteval);
> +	return __pte(xchg_relaxed(&pte_val(*ptep), 0));
>  }
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -659,21 +650,21 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
>  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
>  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
>  {
> -	pteval_t pteval;
> -	unsigned long tmp;
> -
> -	asm volatile("//	ptep_set_wrprotect\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	tst	%0, %4			// check for hw dirty (!PTE_RDONLY)\n"
> -	"	csel	%1, %3, xzr, eq		// set PTE_DIRTY|PTE_RDONLY if dirty\n"
> -	"	orr	%0, %0, %1		// if !dirty, PTE_RDONLY is already set\n"
> -	"	and	%0, %0, %5		// clear PTE_WRITE/PTE_DBM\n"
> -	"	stxr	%w1, %0, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
> -	: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
> -	: "cc");
> +	pte_t old_pte, pte;
> +
> +	do {
> +		pte = old_pte = READ_ONCE(*ptep);
> +		/*
> +		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
> +		 * clear), set the PTE_DIRTY and PTE_RDONLY bits.
> +		 */
> +		if (pte_hw_dirty(pte)) {
> +			pte = pte_mkdirty(pte);
> +			pte = pte_set_rdonly(pte);
> +		}
> +		pte = pte_wrprotect(pte);
> +	} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
> +				 pte_val(pte)) != pte_val(old_pte));
>  }
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 2509e4fe6992..65ed66bb2828 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -34,6 +34,7 @@
>  #include <linux/hugetlb.h>
>  
>  #include <asm/bug.h>
> +#include <asm/cmpxchg.h>
>  #include <asm/cpufeature.h>
>  #include <asm/exception.h>
>  #include <asm/debug-monitors.h>
> @@ -154,8 +155,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
>  			  unsigned long address, pte_t *ptep,
>  			  pte_t entry, int dirty)
>  {
> -	pteval_t old_pteval;
> -	unsigned int tmp;
> +	pteval_t old_pteval, pteval;
>  
>  	if (pte_same(*ptep, entry))
>  		return 0;
> @@ -165,7 +165,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
>  
>  	/* set PTE_RDONLY if actual read-only or clean PTE */
>  	if (!pte_write(entry) || !pte_sw_dirty(entry))
> -		pte_val(entry) |= PTE_RDONLY;
> +		entry = pte_set_rdonly(entry);
>  
>  	/*
>  	 * Setting the flags must be done atomically to avoid racing with the
> @@ -174,16 +174,13 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
>  	 * (calculated as: a & b == ~(~a | ~b)).
>  	 */
>  	pte_val(entry) ^= PTE_RDONLY;
> -	asm volatile("//	ptep_set_access_flags\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	eor	%0, %0, %3		// negate PTE_RDONLY in *ptep\n"
> -	"	orr	%0, %0, %4		// set flags\n"
> -	"	eor	%0, %0, %3		// negate final PTE_RDONLY\n"
> -	"	stxr	%w1, %0, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
> -	: "L" (PTE_RDONLY), "r" (pte_val(entry)));
> +	do {
> +		old_pteval = READ_ONCE(pte_val(*ptep));
> +		pteval = old_pteval ^ PTE_RDONLY;
> +		pteval |= pte_val(entry);
> +		pteval ^= PTE_RDONLY;
> +	} while (cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval) !=
> +		 old_pteval);
>  
>  	flush_tlb_fix_spurious_fault(vma, address);
>  	return 1;

You haven't changed this in your patch, but how is the return value supposed
to interact with concurrent updates to the pte? Isn't the earlier pte_same
check racy?

Will



More information about the linux-arm-kernel mailing list