[PATCH v9 3/4] arm64: mte: introduce a per-CPU tag checking mode preference

Tue Jul 13 10:48:05 PDT 2021

On Mon, Jul 12, 2021 at 11:59:39AM -0700, Peter Collingbourne wrote:
> On Wed, Jul 7, 2021 at 4:15 AM Will Deacon <will at kernel.org> wrote:
> >
> > On Fri, Jul 02, 2021 at 12:41:09PM -0700, Peter Collingbourne wrote:
> > > Add a per-CPU sysfs node, mte_tcf_preferred, that allows the preferred
> > > tag checking mode to be configured. The current possible values are
> > > async and sync.
> > >
> > > Link: https://linux-review.googlesource.com/id/I7493dcd533a2785a1437b16c3f6b50919f840854
> > > Signed-off-by: Peter Collingbourne <pcc at google.com>
> > > Reviewed-by: Catalin Marinas <catalin.marinas at arm.com>
> > > ---
> > >  arch/arm64/kernel/mte.c | 77 +++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 75 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > > index 53d89915029d..48f218e070cc 100644
> > > --- a/arch/arm64/kernel/mte.c
> > > +++ b/arch/arm64/kernel/mte.c
> > > @@ -4,6 +4,7 @@
> > >   */
> > >
> > >  #include <linux/bitops.h>
> > > +#include <linux/cpu.h>
> > >  #include <linux/kernel.h>
> > >  #include <linux/mm.h>
> > >  #include <linux/prctl.h>
> > > @@ -26,6 +27,8 @@ u64 gcr_kernel_excl __ro_after_init;
> > >
> > >  static bool report_fault_once = true;
> > >
> > > +static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
> > > +
> > >  #ifdef CONFIG_KASAN_HW_TAGS
> > >  /* Whether the MTE asynchronous mode is enabled. */
> > >  DEFINE_STATIC_KEY_FALSE(mte_async_mode);
> > > @@ -199,17 +202,24 @@ static void update_gcr_el1_excl(u64 excl)
> > >
> > >  static void mte_update_sctlr_user(struct task_struct *task)
> > >  {
> > > +     /*
> > > +      * This can only be called on the current or next task since the CPU
> > > +      * must match where the thread is going to run.
> > > +      */
> > >       unsigned long sctlr = task->thread.sctlr_user;
> > > -     unsigned long pref = MTE_CTRL_TCF_ASYNC;
> > >       unsigned long mte_ctrl = task->thread.mte_ctrl;
> > > -     unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> > > +     unsigned long pref, resolved_mte_tcf;
> > >
> > > +     preempt_disable();
> > > +     pref = __this_cpu_read(mte_tcf_preferred);
> > > +     resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> > >       sctlr &= ~SCTLR_EL1_TCF0_MASK;
> > >       if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
> > >               sctlr |= SCTLR_EL1_TCF0_ASYNC;
> > >       else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
> > >               sctlr |= SCTLR_EL1_TCF0_SYNC;
> > >       task->thread.sctlr_user = sctlr;
> > > +     preempt_enable();
> > >  }
> > >
> > >  void mte_thread_init_user(void)
> > > @@ -441,3 +451,66 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
> > >
> > >       return ret;
> > >  }
> > > +
> > > +static ssize_t mte_tcf_preferred_show(struct device *dev,
> > > +                                   struct device_attribute *attr, char *buf)
> > > +{
> > > +     switch (per_cpu(mte_tcf_preferred, dev->id)) {
> > > +     case MTE_CTRL_TCF_ASYNC:
> > > +             return sysfs_emit(buf, "async\n");
> > > +     case MTE_CTRL_TCF_SYNC:
> > > +             return sysfs_emit(buf, "sync\n");
> > > +     default:
> > > +             return sysfs_emit(buf, "???\n");
> > > +     }
> > > +}
> > > +
> > > +static void sync_sctlr(void *arg)
> > > +{
> > > +     mte_update_sctlr_user(current);
> > > +     set_task_sctlr_el1(current->thread.sctlr_user);
> > > +}
> > > +
> > > +static ssize_t mte_tcf_preferred_store(struct device *dev,
> > > +                                    struct device_attribute *attr,
> > > +                                    const char *buf, size_t count)
> > > +{
> > > +     ssize_t ret = 0;
> > > +     u64 tcf;
> > > +
> > > +     if (sysfs_streq(buf, "async"))
> > > +             tcf = MTE_CTRL_TCF_ASYNC;
> > > +     else if (sysfs_streq(buf, "sync"))
> > > +             tcf = MTE_CTRL_TCF_SYNC;
> > > +     else
> > > +             return -EINVAL;
> > > +
> > > +     device_lock(dev);
> > > +     per_cpu(mte_tcf_preferred, dev->id) = tcf;
> > > +
> > > +     if (cpu_online(dev->id))
> > > +             ret = smp_call_function_single(dev->id, sync_sctlr, NULL, 0);
> >
> > Hmm, so this call could land right in the middle of a concurrent prctl().
> > What happens in that case?
> 
> I don't think anything can go wrong. When the prctl sets mte_ctrl we
> then need to call mte_update_sctlr_user followed by set_task_sctlr_el1
> and it doesn't matter if it happens multiple times (either
> mte_update_sctlr_user/set_task_sctlr_el1/mte_update_sctlr_user/set_task_sctlr_el1
> or even the less likely
> mte_update_sctlr_user/mte_update_sctlr_user/set_task_sctlr_el1/set_task_sctlr_el1).

I think you're still (at least) relying on the compiler not to tear or cache
accesses to ->thread.sctlr_user, no? It seems like it would be a lot simpler
just to leave the change until the next context switch and not send the IPI
at all. I think that's a reasonable behaviour because the write to sysfs is
racy anyway, so we can just document this.

> Actually, I'm not sure we need any code in the sync_sctlr function at
> all. Simply scheduling an empty function onto the CPU will cause
> mte_update_sctlr_user and then update_sctlr_el1 to be called when the
> task that was originally running on the CPU gets rescheduled onto it,
> as a result of the change to mte_thread_switch.

I'm not sure I agree here -- I thought smp_call_function_single() ran the
target function directly from the IPI handler in interrupt context, without
the need for a context-switch.

Will