[PATCH] drivers/perf: riscv_pmu_sbi: add perf_user_access sysctl

Palmer Dabbelt palmer at dabbelt.com
Thu Oct 6 19:26:54 PDT 2022


On Fri, 26 Aug 2022 08:15:56 PDT (-0700), heiko at sntech.de wrote:
> Add a sysctl similar to the one on arm64 to enable/disable
> access to counter CSRs from u-mode on RISC-V.
>
> The default is of course set to disabled keeping the current
> state of access - to only the TIME CSR.

Sorry for being slow on this one, but IMO this is the wrong way to go: 
this was pretty clearly described by the PDFs as a non-optional 
instruction when we committed to uABI stability, and there's userspace 
binaries that use these instructions.  I know the ISA folks changed 
their minds about these being in the base, but that doesn't mean we can 
break userspace.

If you're worried about a side channel from the high resolution timers 
that makes sense, but we can sort that out without breaking userspace: 
we just trap the counter accesses and handle them in the kernel with 
less precision.  We'll need to do that in the long run anyway, as 
there's no way to make sure these are implement.  That all applies to 
the time counter as well.

I'd also argue this should be a prctl(), as that'll allow users to flip 
it on/off for specific processes.  We can make it sticky to deal with 
the side channels, but at least starting with a per-process flag will 
let us avoid breaking compatibility for everyone.

> Signed-off-by: Heiko Stuebner <heiko at sntech.de>
> ---
>  Documentation/admin-guide/sysctl/kernel.rst |  6 +--
>  drivers/perf/riscv_pmu_sbi.c                | 43 ++++++++++++++++++++-
>  2 files changed, 44 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
> index ee6572b1edad..efd4bc385e7a 100644
> --- a/Documentation/admin-guide/sysctl/kernel.rst
> +++ b/Documentation/admin-guide/sysctl/kernel.rst
> @@ -894,15 +894,15 @@ enabled, otherwise writing to this file will return ``-EBUSY``.
>  The default value is 8.
>
>
> -perf_user_access (arm64 only)
> -=================================
> +perf_user_access (arm64 and riscv only)
> +=======================================
>
>  Controls user space access for reading perf event counters. When set to 1,
>  user space can read performance monitor counter registers directly.
>
>  The default value is 0 (access disabled).
>
> -See Documentation/arm64/perf.rst for more information.
> +See Documentation/arm64/perf.rst for more information on arm64
>
>
>  pid_max
> diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
> index 6f6681bbfd36..7aab8d673357 100644
> --- a/drivers/perf/riscv_pmu_sbi.c
> +++ b/drivers/perf/riscv_pmu_sbi.c
> @@ -41,6 +41,8 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
>  	NULL,
>  };
>
> +static int sysctl_perf_user_access __read_mostly;
> +
>  /*
>   * RISC-V doesn't have hetergenous harts yet. This need to be part of
>   * per_cpu in case of harts with different pmu counters
> @@ -640,13 +642,22 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
>  	return IRQ_HANDLED;
>  }
>
> +/*
> + * Depending on the perf_user_access setting, enable the access
> + * from usermode either for all counters or for TIME csr only.
> + */
> +static void riscv_pmu_update_user_access(void *info)
> +{
> +	csr_write(CSR_SCOUNTEREN, sysctl_perf_user_access ? GENMASK(31, 0) :
> +							    0x2);
> +}
> +
>  static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
>  {
>  	struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
>  	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
>
> -	/* Enable the access for TIME csr only from the user mode now */
> -	csr_write(CSR_SCOUNTEREN, 0x2);
> +	riscv_pmu_update_user_access(NULL);
>
>  	/* Stop all the counters so that they can be enabled from perf */
>  	pmu_sbi_stop_all(pmu);
> @@ -785,6 +796,32 @@ static void riscv_pmu_destroy(struct riscv_pmu *pmu)
>  	cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
>  }
>
> +static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
> +			int write, void *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> +
> +	if (ret || !write)
> +		return ret;
> +
> +	on_each_cpu(riscv_pmu_update_user_access, NULL, 1);
> +
> +	return 0;
> +}
> +
> +static struct ctl_table sbi_pmu_sysctl_table[] = {
> +	{
> +		.procname       = "perf_user_access",
> +		.data		= &sysctl_perf_user_access,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode           = 0644,
> +		.proc_handler	= riscv_pmu_proc_user_access_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{ }
> +};
> +
>  static int pmu_sbi_device_probe(struct platform_device *pdev)
>  {
>  	struct riscv_pmu *pmu = NULL;
> @@ -834,6 +871,8 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto out_unregister;
>
> +	register_sysctl("kernel", sbi_pmu_sysctl_table);
> +
>  	return 0;
>
>  out_unregister:



More information about the linux-riscv mailing list