[PATCH V3] ARM: mm: make UACCESS_WITH_MEMCPY huge page aware
Nicolas Pitre
nicolas.pitre at linaro.org
Fri Oct 11 16:16:15 EDT 2013
On Fri, 11 Oct 2013, Steve Capper wrote:
> The memory pinning code in uaccess_with_memcpy.c does not check
> for HugeTLB or THP pmds, and will enter an infinite loop should
> a __copy_to_user or __clear_user occur against a huge page.
>
> This patch adds detection code for huge pages to pin_page_for_write.
> As this code can be executed in a fast path it refers to the actual
> pmds rather than the vma. If a HugeTLB or THP is found (they have
> the same pmd representation on ARM), the page table spinlock is
> taken to prevent modification whilst the page is pinned.
>
> On ARM, huge pages are only represented as pmds, thus no huge pud
> checks are performed. (For huge puds one would lock the page table
> in a similar manner as in the pmd case).
>
> Two helper functions are introduced; pmd_thp_or_huge will check
> whether or not a page is huge or transparent huge (which have the
> same pmd layout on ARM), and pmd_hugewillfault will detect whether
> or not a page fault will occur on write to the page.
>
> Running the following test (with the chunking from read_zero
> removed):
> $ dd if=/dev/zero of=/dev/null bs=10M count=1024
> Gave: 2.3 GB/s backed by normal pages,
> 2.9 GB/s backed by huge pages,
> 5.1 GB/s backed by huge pages, with page mask=HPAGE_MASK.
>
> After some discussion, it was decided not to adopt the HPAGE_MASK,
> as this would have a significant detrimental effect on the overall
> system latency due to page_table_lock being held for too long.
> This could be revisited if split huge page locks are adopted.
>
> Signed-off-by: Steve Capper <steve.capper at linaro.org>
> ---
> Changes in V3:
> pmd_hugewillfault and pmd_thp_or_huge were erroneously ommitted from
> pgtable-2level.h causing build problems. Empty stubs are added for now
> as we don't have huge page support for short descriptors.
>
> Nicolas, can I please re-add your Reviewed-by to this?
Yes.
> ---
> arch/arm/include/asm/pgtable-2level.h | 7 ++++++
> arch/arm/include/asm/pgtable-3level.h | 3 +++
> arch/arm/lib/uaccess_with_memcpy.c | 41 ++++++++++++++++++++++++++++++++---
> 3 files changed, 48 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> index f97ee02..86a659a 100644
> --- a/arch/arm/include/asm/pgtable-2level.h
> +++ b/arch/arm/include/asm/pgtable-2level.h
> @@ -181,6 +181,13 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
>
> #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
>
> +/*
> + * We don't have huge page support for short descriptors, for the moment
> + * define empty stubs for use by pin_page_for_write.
> + */
> +#define pmd_hugewillfault(pmd) (0)
> +#define pmd_thp_or_huge(pmd) (0)
> +
> #endif /* __ASSEMBLY__ */
>
> #endif /* _ASM_PGTABLE_2LEVEL_H */
> diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
> index 5689c18..39c54cf 100644
> --- a/arch/arm/include/asm/pgtable-3level.h
> +++ b/arch/arm/include/asm/pgtable-3level.h
> @@ -206,6 +206,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
> #define __HAVE_ARCH_PMD_WRITE
> #define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
>
> +#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
> +#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
> +
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
> #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
> diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
> index 025f742..3e58d71 100644
> --- a/arch/arm/lib/uaccess_with_memcpy.c
> +++ b/arch/arm/lib/uaccess_with_memcpy.c
> @@ -18,6 +18,7 @@
> #include <linux/hardirq.h> /* for in_atomic() */
> #include <linux/gfp.h>
> #include <linux/highmem.h>
> +#include <linux/hugetlb.h>
> #include <asm/current.h>
> #include <asm/page.h>
>
> @@ -40,7 +41,35 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
> return 0;
>
> pmd = pmd_offset(pud, addr);
> - if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
> + if (unlikely(pmd_none(*pmd)))
> + return 0;
> +
> + /*
> + * A pmd can be bad if it refers to a HugeTLB or THP page.
> + *
> + * Both THP and HugeTLB pages have the same pmd layout
> + * and should not be manipulated by the pte functions.
> + *
> + * Lock the page table for the destination and check
> + * to see that it's still huge and whether or not we will
> + * need to fault on write, or if we have a splitting THP.
> + */
> + if (unlikely(pmd_thp_or_huge(*pmd))) {
> + ptl = ¤t->mm->page_table_lock;
> + spin_lock(ptl);
> + if (unlikely(!pmd_thp_or_huge(*pmd)
> + || pmd_hugewillfault(*pmd)
> + || pmd_trans_splitting(*pmd))) {
> + spin_unlock(ptl);
> + return 0;
> + }
> +
> + *ptep = NULL;
> + *ptlp = ptl;
> + return 1;
> + }
> +
> + if (unlikely(pmd_bad(*pmd)))
> return 0;
>
> pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
> @@ -94,7 +123,10 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
> from += tocopy;
> n -= tocopy;
>
> - pte_unmap_unlock(pte, ptl);
> + if (pte)
> + pte_unmap_unlock(pte, ptl);
> + else
> + spin_unlock(ptl);
> }
> if (!atomic)
> up_read(¤t->mm->mmap_sem);
> @@ -147,7 +179,10 @@ __clear_user_memset(void __user *addr, unsigned long n)
> addr += tocopy;
> n -= tocopy;
>
> - pte_unmap_unlock(pte, ptl);
> + if (pte)
> + pte_unmap_unlock(pte, ptl);
> + else
> + spin_unlock(ptl);
> }
> up_read(¤t->mm->mmap_sem);
>
> --
> 1.8.1.4
>
More information about the linux-arm-kernel
mailing list