[PATCH v2] arm64: mte: allow async MTE to be upgraded to sync on a per-CPU basis

Fri Jun 11 07:49:15 PDT 2021

On Wed, Jun 09, 2021 at 07:12:29PM -0700, Peter Collingbourne wrote:
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 9df3feeee890..545ef900e7ce 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -159,6 +159,7 @@ struct thread_struct {
>  #define SCTLR_USER_MASK                                                        \
>  	(SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB |   \
>  	 SCTLR_EL1_TCF0_MASK)
> +#define SCTLR_USER_DYNAMIC_TCF (1ULL << 63)

Even if you called it "USER", it still gives the impression that it's
some real hardware bit. Who knows, in a few years time it may be
allocated to a real feature.

I also don't think this logic should be added to processor.[ch], just
keep it within mte.c.

So while it's convenient to add something to this field, given that it's
shared with ptrauth, it's pretty fragile long term. I'd add the
information about the dynamic mode to a different field. We could rename
gcr_user_excl to mte_ctrl or something and store a few bits in there in
addition to GCR_EL1.Excl (with corresponding masks etc.)

> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 125a10e413e9..f244bc96464c 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
[...]
> @@ -283,6 +288,9 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
>  		return -EINVAL;
>  	}
>  
> +	if (arg & PR_MTE_DYNAMIC_TCF)
> +		sctlr |= SCTLR_USER_DYNAMIC_TCF;

Please move this to thread.mte_ctrl and have mte_thread_switch() update
thread.sctlr_user (which would be set in hardware subsequently in
__switch_to()).

[...]
> @@ -453,3 +464,60 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
>  
>  	return ret;
>  }
> +
> +static ssize_t mte_upgrade_async_show(struct device *dev,
> +				      struct device_attribute *attr, char *buf)
> +{
> +	u32 val = per_cpu(mte_upgrade_async, dev->id) >> SCTLR_EL1_TCF0_SHIFT;
> +
> +	return sysfs_emit(buf, "%u\n", val);
> +}
> +
> +static int sync_sctlr(void *arg)
> +{
> +	update_sctlr_el1(current->thread.sctlr_user);
> +	return 0;
> +}

This should update thread.sctlr_user based on thread.mte_ctrl before
calling update_sctlr_el1() (i.e. keep the logic to this file).

> +
> +static ssize_t mte_upgrade_async_store(struct device *dev,
> +				       struct device_attribute *attr,
> +				       const char *buf, size_t count)
> +{
> +	ssize_t ret;
> +	u32 val;
> +	u64 tcf;
> +
> +	ret = kstrtou32(buf, 0, &val);
> +	if (ret < 0)
> +		return ret;
> +
> +	tcf = ((u64)val) << SCTLR_EL1_TCF0_SHIFT;
> +	if (tcf != SCTLR_EL1_TCF0_NONE && tcf != SCTLR_EL1_TCF0_SYNC &&
> +	    tcf != SCTLR_EL1_TCF0_ASYNC)
> +		return -EINVAL;
> +
> +	device_lock(dev);
> +	per_cpu(mte_upgrade_async, dev->id) = tcf;
> +
> +	ret = stop_machine(sync_sctlr, 0, cpumask_of(dev->id));

Do we really need a stop_machine() here? That's really heavy and we
don't need such synchronisation. An smp_call_function() should do or
just leave it until the next context switch on the corresponding CPUs.

[...]
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index b4bb67f17a2c..27a12c53529d 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -527,8 +527,19 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
>  	write_sysreg(val, cntkctl_el1);
>  }
>  
> -static void update_sctlr_el1(u64 sctlr)
> +DECLARE_PER_CPU_READ_MOSTLY(u64, mte_upgrade_async);
> +
> +void update_sctlr_el1(u64 sctlr)
>  {
> +	if (sctlr & SCTLR_USER_DYNAMIC_TCF) {
> +		sctlr &= ~SCTLR_USER_DYNAMIC_TCF;
> +
> +		if ((sctlr & SCTLR_EL1_TCF0_MASK) == SCTLR_EL1_TCF0_ASYNC) {
> +			sctlr &= ~SCTLR_EL1_TCF0_MASK;
> +			sctlr |= __this_cpu_read(mte_upgrade_async);
> +		}
> +	}

As per my comments above, please move this logic to mte.c.

[...]
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index dcd7041b2b07..20c1503c8cdd 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -56,6 +56,8 @@
>  
>  DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
>  EXPORT_PER_CPU_SYMBOL(cpu_number);
> +DEFINE_PER_CPU_READ_MOSTLY(u64, mte_upgrade_async);
> +EXPORT_PER_CPU_SYMBOL(mte_upgrade_async);
>  
>  /*
>   * as from 2.5, kernels no longer have an init_tasks structure
> @@ -649,6 +651,27 @@ static void __init acpi_parse_and_init_cpus(void)
>  #define acpi_parse_and_init_cpus(...)	do { } while (0)
>  #endif
>  
> +/*
> + * Store the default values for per-CPU properties typically read from DT or
> + * ACPI to per-CPU variables.
> + */
> +static void __init set_default_cpu_properties(int cpu)
> +{
> +	per_cpu(mte_upgrade_async, cpu) = SCTLR_EL1_TCF0_ASYNC;
> +}
> +
> +/*
> + * Read per-CPU properties from the device tree and store them in per-CPU
> + * variables for efficient access later.
> + */
> +static void __init of_read_cpu_properties(int cpu, struct device_node *dn)
> +{
> +	u32 prop;
> +
> +	if (of_property_read_u32(dn, "mte-upgrade-async", &prop) == 0)
> +		per_cpu(mte_upgrade_async, cpu) = ((u64)prop) << SCTLR_EL1_TCF0_SHIFT;
> +}

I don't think we should introduce the DT description at all, at least
not at this stage. I'm not entirely convinced async == sync in terms of
performance on certain CPU/SoC implementations. There's probably some
small difference between them, depending on the benchmark. We don't have
a concept of what an acceptable margin is, so baking it into the DT
doesn't make sense.

Do mobile vendors normally have some of their own software running on
the SoCs? They could use some startup code to program the sysfs
interface, maybe based on the MIDR.

>  static const char *ipi_types[NR_IPI] __tracepoint_string = {
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 18a9f59dc067..4dab44732814 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -242,6 +242,8 @@ struct prctl_mm_map {
>  /* MTE tag inclusion mask */
>  # define PR_MTE_TAG_SHIFT		3
>  # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
> +/* Enable dynamic upgrading of MTE tag check fault mode */
> +# define PR_MTE_DYNAMIC_TCF		(1UL << 19)

Once we agreed on the implementation, please add some documentation on
this new control, as we did with the others.

-- 
Catalin