[PATCH v3 2/3] arm64: mte: handle tags zeroing at page allocation time
Andrey Konovalov
andreyknvl at gmail.com
Tue May 25 15:00:53 PDT 2021
On Wed, May 12, 2021 at 11:09 PM Peter Collingbourne <pcc at google.com> wrote:
>
> Currently, on an anonymous page fault, the kernel allocates a zeroed
> page and maps it in user space. If the mapping is tagged (PROT_MTE),
> set_pte_at() additionally clears the tags. It is, however, more
> efficient to clear the tags at the same time as zeroing the data on
> allocation. To avoid clearing the tags on any page (which may not be
> mapped as tagged), only do this if the vma flags contain VM_MTE. This
> requires introducing a new GFP flag that is used to determine whether
> to clear the tags.
>
> The DC GZVA instruction with a 0 top byte (and 0 tag) requires
> top-byte-ignore. Set the TCR_EL1.{TBI1,TBID1} bits irrespective of
> whether KASAN_HW is enabled.
>
> Signed-off-by: Peter Collingbourne <pcc at google.com>
> Co-developed-by: Catalin Marinas <catalin.marinas at arm.com>
> Signed-off-by: Catalin Marinas <catalin.marinas at arm.com>
> Link: https://linux-review.googlesource.com/id/Id46dc94e30fe11474f7e54f5d65e7658dbdddb26
> Reviewed-by: Catalin Marinas <catalin.marinas at arm.com>
> ---
> v2:
> - remove want_zero_tags_on_free()
>
> arch/arm64/include/asm/mte.h | 4 ++++
> arch/arm64/include/asm/page.h | 9 +++++++--
> arch/arm64/lib/mte.S | 20 ++++++++++++++++++++
> arch/arm64/mm/fault.c | 25 +++++++++++++++++++++++++
> arch/arm64/mm/proc.S | 10 +++++++---
> include/linux/gfp.h | 9 +++++++--
> include/linux/highmem.h | 8 ++++++++
> mm/kasan/hw_tags.c | 9 ++++++++-
> mm/page_alloc.c | 13 ++++++++++---
> 9 files changed, 96 insertions(+), 11 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index bc88a1ced0d7..67bf259ae768 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -37,6 +37,7 @@ void mte_free_tag_storage(char *storage);
> /* track which pages have valid allocation tags */
> #define PG_mte_tagged PG_arch_2
>
> +void mte_zero_clear_page_tags(void *addr);
> void mte_sync_tags(pte_t *ptep, pte_t pte);
> void mte_copy_page_tags(void *kto, const void *kfrom);
> void mte_thread_init_user(void);
> @@ -53,6 +54,9 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
> /* unused if !CONFIG_ARM64_MTE, silence the compiler */
> #define PG_mte_tagged 0
>
> +static inline void mte_zero_clear_page_tags(void *addr)
> +{
> +}
> static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
> {
> }
> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> index 012cffc574e8..448e14071d13 100644
> --- a/arch/arm64/include/asm/page.h
> +++ b/arch/arm64/include/asm/page.h
> @@ -13,6 +13,7 @@
> #ifndef __ASSEMBLY__
>
> #include <linux/personality.h> /* for READ_IMPLIES_EXEC */
> +#include <linux/types.h> /* for gfp_t */
> #include <asm/pgtable-types.h>
>
> struct page;
> @@ -28,10 +29,14 @@ void copy_user_highpage(struct page *to, struct page *from,
> void copy_highpage(struct page *to, struct page *from);
> #define __HAVE_ARCH_COPY_HIGHPAGE
>
> -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
> - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
> +struct page *__alloc_zeroed_user_highpage(gfp_t movableflags,
> + struct vm_area_struct *vma,
> + unsigned long vaddr);
> #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
>
> +void tag_clear_highpage(struct page *to);
> +#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
> +
> #define clear_user_page(page, vaddr, pg) clear_page(page)
> #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
>
> diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> index 351537c12f36..e83643b3995f 100644
> --- a/arch/arm64/lib/mte.S
> +++ b/arch/arm64/lib/mte.S
> @@ -36,6 +36,26 @@ SYM_FUNC_START(mte_clear_page_tags)
> ret
> SYM_FUNC_END(mte_clear_page_tags)
>
> +/*
> + * Zero the page and tags at the same time
> + *
> + * Parameters:
> + * x0 - address to the beginning of the page
> + */
> +SYM_FUNC_START(mte_zero_clear_page_tags)
> + mrs x1, dczid_el0
> + and w1, w1, #0xf
> + mov x2, #4
> + lsl x1, x2, x1
> + and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag
> +
> +1: dc gzva, x0
> + add x0, x0, x1
> + tst x0, #(PAGE_SIZE - 1)
> + b.ne 1b
> + ret
> +SYM_FUNC_END(mte_zero_clear_page_tags)
> +
> /*
> * Copy the tags from the source page to the destination one
> * x0 - address of the destination page
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 871c82ab0a30..8127e0c0b8fb 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -921,3 +921,28 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
> debug_exception_exit(regs);
> }
> NOKPROBE_SYMBOL(do_debug_exception);
> +
> +/*
> + * Used during anonymous page fault handling.
> + */
> +struct page *__alloc_zeroed_user_highpage(gfp_t flags,
> + struct vm_area_struct *vma,
> + unsigned long vaddr)
> +{
> + /*
> + * If the page is mapped with PROT_MTE, initialise the tags at the
> + * point of allocation and page zeroing as this is usually faster than
> + * separate DC ZVA and STGM.
> + */
> + if (vma->vm_flags & VM_MTE)
> + flags |= __GFP_ZEROTAGS;
> +
> + return alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | flags, vma, vaddr);
> +}
> +
> +void tag_clear_highpage(struct page *page)
> +{
> + mte_zero_clear_page_tags(page_address(page));
> + page_kasan_tag_reset(page);
> + set_bit(PG_mte_tagged, &page->flags);
> +}
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 0a48191534ff..a27c77dbe91c 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -46,9 +46,13 @@
> #endif
>
> #ifdef CONFIG_KASAN_HW_TAGS
> -#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
> +#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
> #else
> -#define TCR_KASAN_HW_FLAGS 0
> +/*
> + * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
> + * TBI being enabled at EL1.
> + */
> +#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
> #endif
>
> /*
> @@ -452,7 +456,7 @@ SYM_FUNC_START(__cpu_setup)
> msr_s SYS_TFSRE0_EL1, xzr
>
> /* set the TCR_EL1 bits */
> - mov_q x10, TCR_KASAN_HW_FLAGS
> + mov_q x10, TCR_MTE_FLAGS
> orr tcr, tcr, x10
> 1:
> #endif
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 11da8af06704..68ba237365dc 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -53,8 +53,9 @@ struct vm_area_struct;
> #define ___GFP_HARDWALL 0x100000u
> #define ___GFP_THISNODE 0x200000u
> #define ___GFP_ACCOUNT 0x400000u
> +#define ___GFP_ZEROTAGS 0x800000u
> #ifdef CONFIG_LOCKDEP
> -#define ___GFP_NOLOCKDEP 0x800000u
> +#define ___GFP_NOLOCKDEP 0x1000000u
> #else
> #define ___GFP_NOLOCKDEP 0
> #endif
> @@ -229,16 +230,20 @@ struct vm_area_struct;
> * %__GFP_COMP address compound page metadata.
> *
> * %__GFP_ZERO returns a zeroed page on success.
> + *
> + * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if
> + * __GFP_ZERO is set.
> */
> #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
> #define __GFP_COMP ((__force gfp_t)___GFP_COMP)
> #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
> +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS)
>
> /* Disable lockdep for GFP context tracking */
> #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
>
> /* Room for N __GFP_FOO bits */
> -#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
> +#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP))
> #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
>
> /**
> diff --git a/include/linux/highmem.h b/include/linux/highmem.h
> index 832b49b50c7b..caaa62e1dd24 100644
> --- a/include/linux/highmem.h
> +++ b/include/linux/highmem.h
> @@ -204,6 +204,14 @@ static inline void clear_highpage(struct page *page)
> kunmap_atomic(kaddr);
> }
>
> +#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
> +
> +static inline void tag_clear_highpage(struct page *page)
> +{
> +}
> +
> +#endif
> +
> /*
> * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
> * If we pass in a head page, we can zero up to the size of the compound page.
> diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
> index 45e552cb9172..34362c8d0955 100644
> --- a/mm/kasan/hw_tags.c
> +++ b/mm/kasan/hw_tags.c
> @@ -242,7 +242,14 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
> {
> bool init = !want_init_on_free() && want_init_on_alloc(flags);
>
> - kasan_unpoison_pages(page, order, init);
> + if (flags & __GFP_ZEROTAGS) {
> + int i;
> +
> + for (i = 0; i != 1 << order; ++i)
> + tag_clear_highpage(page + i);
> + } else {
> + kasan_unpoison_pages(page, order, init);
> + }
> }
>
> void kasan_free_pages(struct page *page, unsigned int order)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6e82a7f6fd6f..24e6f668ef73 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1219,10 +1219,16 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
> return ret;
> }
>
> -static void kernel_init_free_pages(struct page *page, int numpages)
> +static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
> {
> int i;
>
> + if (zero_tags) {
> + for (i = 0; i < numpages; i++)
> + tag_clear_highpage(page + i);
> + return;
> + }
> +
> /* s390's use of memset() could override KASAN redzones. */
> kasan_disable_current();
> for (i = 0; i < numpages; i++) {
> @@ -1314,7 +1320,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
> bool init = want_init_on_free();
>
> if (init)
> - kernel_init_free_pages(page, 1 << order);
> + kernel_init_free_pages(page, 1 << order, false);
> if (!skip_kasan_poison)
> kasan_poison_pages(page, order, init);
> }
> @@ -2350,7 +2356,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
>
> kasan_unpoison_pages(page, order, init);
> if (init)
> - kernel_init_free_pages(page, 1 << order);
> + kernel_init_free_pages(page, 1 << order,
> + gfp_flags & __GFP_ZEROTAGS);
> }
>
> set_page_owner(page, order, gfp_flags);
> --
> 2.31.1.607.g51e8a6a459-goog
>
For KASAN parts:
Reviewed-by: Andrey Konovalov <andreyknvl at gmail.com>
More information about the linux-arm-kernel
mailing list