[PATCH 4/4] arm64: mte: Lock a page for MTE tag initialisation
Peter Collingbourne
pcc at google.com
Fri Jul 8 16:11:59 PDT 2022
On Tue, Jul 5, 2022 at 7:26 AM Catalin Marinas <catalin.marinas at arm.com> wrote:
>
> Initialising the tags and setting PG_mte_tagged flag for a page can race
> between multiple set_pte_at() on shared pages or setting the stage 2 pte
> via user_mem_abort(). Introduce a new PG_mte_lock flag as PG_arch_3 and
> set it before attempting page initialisation. Given that PG_mte_tagged
> is never cleared for a page, consider setting this flag to mean page
> unlocked and wait on this bit with acquire semantics if the page is
> locked:
>
> - try_page_mte_tagging() - lock the page for tagging, return true if it
> can be tagged, false if already tagged. No acquire semantics if it
> returns true (PG_mte_tagged not set) as there is no serialisation with
> a previous set_page_mte_tagged().
>
> - set_page_mte_tagged() - set PG_mte_tagged with release semantics.
>
> The two-bit locking is based on Peter Colingbourne's idea.
nit: Collingbourne (two l's).
>
> Signed-off-by: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Steven Price <steven.price at arm.com>
> Cc: Peter Collingbourne <pcc at google.com>
> ---
> arch/arm64/include/asm/mte.h | 32 ++++++++++++++++++++++++++++++++
> arch/arm64/include/asm/pgtable.h | 1 +
> arch/arm64/kernel/cpufeature.c | 2 +-
> arch/arm64/kernel/mte.c | 7 +++++--
> arch/arm64/kvm/guest.c | 16 ++++++++++------
> arch/arm64/kvm/mmu.c | 2 +-
> arch/arm64/mm/copypage.c | 2 ++
> arch/arm64/mm/fault.c | 2 ++
> arch/arm64/mm/mteswap.c | 3 +++
> 9 files changed, 57 insertions(+), 10 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index c69218c56980..29712fc9df8c 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -36,6 +36,8 @@ void mte_free_tag_storage(char *storage);
>
> /* track which pages have valid allocation tags */
> #define PG_mte_tagged PG_arch_2
> +/* simple lock to avoid multiple threads tagging the same page */
> +#define PG_mte_lock PG_arch_3
>
> static inline void set_page_mte_tagged(struct page *page)
> {
> @@ -60,6 +62,32 @@ static inline bool page_mte_tagged(struct page *page)
> return ret;
> }
>
> +/*
> + * Lock the page for tagging and return 'true' if the page can be tagged,
> + * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
> + * locking only happens once for page initialisation.
> + *
> + * The page MTE lock state:
> + *
> + * Locked: PG_mte_lock && !PG_mte_tagged
> + * Unlocked: !PG_mte_lock || PG_mte_tagged
> + *
> + * Acquire semantics only if the page is tagged (returning 'false').
> + */
> +static inline bool try_page_mte_tagging(struct page *page)
> +{
> + if (!test_and_set_bit(PG_mte_lock, &page->flags))
> + return !page_mte_tagged(page);
Since all callers of set_page_mte_tagged() are now dominated by a call
to try_page_mte_tagging() and PG_mte_lock is never cleared I think we
can't end up in the state where !PG_mte_lock && PG_mte_tagged. So I
think this can be simplified to "return true;". I can still boot VMs
with MTE enabled after making my suggested change.
> +
> + /*
> + * The tags are being initialised, wait for the PG_mte_tagged flag to
I think at this point the tags are either being initialized or have
already been initialized, so the comment isn't quite right.
Peter
> + * be set.
> + */
> + smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
> +
> + return false;
> +}
> +
> void mte_zero_clear_page_tags(void *addr);
> void mte_sync_tags(pte_t old_pte, pte_t pte);
> void mte_copy_page_tags(void *kto, const void *kfrom);
> @@ -84,6 +112,10 @@ static inline bool page_mte_tagged(struct page *page)
> {
> return false;
> }
> +static inline bool try_page_mte_tagging(struct page *page)
> +{
> + return false;
> +}
> static inline void mte_zero_clear_page_tags(void *addr)
> {
> }
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 08823669db0a..ce2dc72f64f4 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -1033,6 +1033,7 @@ static inline void arch_swap_invalidate_area(int type)
> #define __HAVE_ARCH_SWAP_RESTORE
> static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
> {
> + /* mte_restore_tags() takes the PG_mte_lock */
> if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
> set_page_mte_tagged(&folio->page);
> }
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 4478e5774580..c07dd7916517 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1964,7 +1964,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
> * Clear the tags in the zero page. This needs to be done via the
> * linear map which has the Tagged attribute.
> */
> - if (!page_mte_tagged(ZERO_PAGE(0))) {
> + if (try_page_mte_tagging(ZERO_PAGE(0))) {
> mte_clear_page_tags(lm_alias(empty_zero_page));
> set_page_mte_tagged(ZERO_PAGE(0));
> }
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 67e82ce4c285..54d284a1e0a7 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -41,6 +41,7 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
> if (check_swap && is_swap_pte(old_pte)) {
> swp_entry_t entry = pte_to_swp_entry(old_pte);
>
> + /* mte_restore_tags() takes the PG_mte_lock */
> if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) {
> set_page_mte_tagged(page);
> return;
> @@ -59,8 +60,10 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
> * the new page->flags are visible before the tags were updated.
> */
> smp_wmb();
> - mte_clear_page_tags(page_address(page));
> - set_page_mte_tagged(page);
> + if (try_page_mte_tagging(page)) {
> + mte_clear_page_tags(page_address(page));
> + set_page_mte_tagged(page);
> + }
> }
>
> void mte_sync_tags(pte_t old_pte, pte_t pte)
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 3b04e69006b4..059b38e7a9e8 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -1067,15 +1067,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
> clear_user(tags, MTE_GRANULES_PER_PAGE);
> kvm_release_pfn_clean(pfn);
> } else {
> + /*
> + * Only locking to serialise with a concurrent
> + * set_pte_at() in the VMM but still overriding the
> + * tags, hence ignoring the return value.
> + */
> + try_page_mte_tagging(page);
> num_tags = mte_copy_tags_from_user(maddr, tags,
> MTE_GRANULES_PER_PAGE);
>
> - /*
> - * Set the flag after checking the write
> - * completed fully
> - */
> - if (num_tags == MTE_GRANULES_PER_PAGE)
> - set_page_mte_tagged(page);
> + /* uaccess failed, don't leave stale tags */
> + if (num_tags != MTE_GRANULES_PER_PAGE)
> + mte_clear_page_tags(page);
> + set_page_mte_tagged(page);
>
> kvm_release_pfn_dirty(pfn);
> }
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 35850f17ae08..fdd46089f260 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1066,7 +1066,7 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
> return;
>
> for (i = 0; i < nr_pages; i++, page++) {
> - if (!page_mte_tagged(page)) {
> + if (try_page_mte_tagging(page)) {
> mte_clear_page_tags(page_address(page));
> set_page_mte_tagged(page);
> }
> diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
> index f36d796f1bce..9c73bc020894 100644
> --- a/arch/arm64/mm/copypage.c
> +++ b/arch/arm64/mm/copypage.c
> @@ -31,6 +31,8 @@ void copy_highpage(struct page *to, struct page *from)
> * the new page->flags are visible before the tags were updated.
> */
> smp_wmb();
> + /* It's a new page, shouldn't have been tagged yet */
> + WARN_ON_ONCE(!try_page_mte_tagging(to));
> mte_copy_page_tags(kto, kfrom);
> set_page_mte_tagged(to);
> }
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 147fe28d3fbe..bd28d6bd9286 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -926,6 +926,8 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
>
> void tag_clear_highpage(struct page *page)
> {
> + /* Newly allocated page, shouldn't have been tagged yet */
> + WARN_ON_ONCE(!try_page_mte_tagging(page));
> mte_zero_clear_page_tags(page_address(page));
> page_kasan_tag_reset(page);
> set_page_mte_tagged(page);
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index 9d3a8cf388fc..aec76a4423e9 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -62,6 +62,9 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page)
> * the new page->flags are visible before the tags were updated.
> */
> smp_wmb();
> + /* racing tag restoring? */
> + if (!try_page_mte_tagging(page))
> + return false;
> mte_restore_page_tags(page_address(page), tags);
>
> return true;
More information about the linux-arm-kernel
mailing list