[kvm-riscv/for-next 2/2] drivers/perf: riscv: Fix RV32 snapshot overflow use case

Samuel Holland samuel.holland at sifive.com
Thu Apr 25 13:18:04 PDT 2024


On 2024-04-25 6:29 PM, Atish Patra wrote:
> The shadow copy alogirthm is implemented incorrectly. This patch fixes
> the behavior by keeping a per cpu shadow copy of the counter values to
> avoid clobbering for the cases where system more than XLEN counters and
> the overflown counter index are beyond XLEN. This issue can only be
> observed only in RV32 if an SBI implementation assigns logical counters
> ids greater than XLEN or firmware counter overflow is supported in the
> future.
> 
> Fixes : commit 22f5dac41004d ("drivers/perf: riscv: Implement SBI PMU snapshot function")

Same comment as for patch 1. The logic looks correct as far as I can tell, so:

Reviewed-by: Samuel Holland <samuel.holland at sifive.com>

One minor comment below.

> Signed-off-by: Atish Patra <atishp at rivosinc.com>
> ---
>  drivers/perf/riscv_pmu_sbi.c   | 49 +++++++++++++++++++---------------
>  include/linux/perf/riscv_pmu.h |  2 ++
>  2 files changed, 30 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
> index 2694110f1cff..98aaeb13e9db 100644
> --- a/drivers/perf/riscv_pmu_sbi.c
> +++ b/drivers/perf/riscv_pmu_sbi.c
> @@ -588,6 +588,7 @@ static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
>  		return sbi_err_map_linux_errno(ret.error);
>  	}
>  
> +	memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
>  	cpu_hw_evt->snapshot_set_done = true;
>  
>  	return 0;
> @@ -605,7 +606,7 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event)
>  	union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
>  
>  	/* Read the value from the shared memory directly only if counter is stopped */
> -	if (sbi_pmu_snapshot_available() & (hwc->state & PERF_HES_STOPPED)) {
> +	if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
>  		val = sdata->ctr_values[idx];
>  		return val;
>  	}
> @@ -769,36 +770,38 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
>  	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
>  	struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
>  	unsigned long flag = 0;
> -	int i;
> +	int i, idx;
>  	struct sbiret ret;
> -	unsigned long temp_ctr_values[64] = {0};
> -	unsigned long ctr_val, temp_ctr_overflow_mask = 0;
> +	u64 temp_ctr_overflow_mask = 0;
>  
>  	if (sbi_pmu_snapshot_available())
>  		flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
>  
> +	/* Reset the shadow copy to avoid save/restore any value from previous overflow */
> +	memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
> +
>  	for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
>  		/* No need to check the error here as we can't do anything about the error */
>  		ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
>  				cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
>  		if (!ret.error && sbi_pmu_snapshot_available()) {
>  			/* Save the counter values to avoid clobbering */
> -			temp_ctr_values[i * BITS_PER_LONG + i] = sdata->ctr_values[i];
> -			/* Save the overflow mask to avoid clobbering */
> -			if (BIT(i) & sdata->ctr_overflow_mask)
> -				temp_ctr_overflow_mask |= BIT(i + i * BITS_PER_LONG);
> +			for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG) {
> +				cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
> +							sdata->ctr_values[idx];
> +				/* Save the overflow mask to avoid clobbering */
> +				if (BIT(idx) & sdata->ctr_overflow_mask)
> +					temp_ctr_overflow_mask |= BIT(idx + i * BITS_PER_LONG);

This is equivalent to doing

  temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG);

outside the for_each_set_bit() loop.

> +			}
>  		}
>  	}
>  
> -	/* Restore the counter values to the shared memory */
> +	/* Restore the counter values to the shared memory for used hw counters */
>  	if (sbi_pmu_snapshot_available()) {
> -		for (i = 0; i < 64; i++) {
> -			ctr_val = temp_ctr_values[i];
> -			if (ctr_val)
> -				sdata->ctr_values[i] = ctr_val;
> -			if (temp_ctr_overflow_mask)
> -				sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
> -		}
> +		for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
> +			sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
> +		if (temp_ctr_overflow_mask)
> +			sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
>  	}
>  }
>  
> @@ -850,7 +853,7 @@ static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
>  static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
>  						   u64 ctr_ovf_mask)
>  {
> -	int idx = 0;
> +	int i, idx = 0;
>  	struct perf_event *event;
>  	unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
>  	u64 max_period, init_val = 0;
> @@ -863,7 +866,7 @@ static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_
>  			hwc = &event->hw;
>  			max_period = riscv_pmu_ctr_get_width_mask(event);
>  			init_val = local64_read(&hwc->prev_count) & max_period;
> -			sdata->ctr_values[idx] = init_val;
> +			cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
>  		}
>  		/*
>  		 * We do not need to update the non-overflow counters the previous
> @@ -871,10 +874,14 @@ static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_
>  		 */
>  	}
>  
> -	for (idx = 0; idx < BITS_TO_LONGS(RISCV_MAX_COUNTERS); idx++) {
> +	for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
> +		/* Restore the counter values to relative indices for used hw counters */
> +		for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
> +			sdata->ctr_values[idx] =
> +					cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
>  		/* Start all the counters in a single shot */
>  		sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
> -			  cpu_hw_evt->used_hw_ctrs[idx], flag, 0, 0, 0);
> +			  cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
>  	}
>  }
>  
> @@ -898,7 +905,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
>  	int lidx, hidx, fidx;
>  	struct riscv_pmu *pmu;
>  	struct perf_event *event;
> -	unsigned long overflow;
> +	u64 overflow;
>  	u64 overflowed_ctrs = 0;
>  	struct cpu_hw_events *cpu_hw_evt = dev;
>  	u64 start_clock = sched_clock();
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> index c3fa90970042..701974639ff2 100644
> --- a/include/linux/perf/riscv_pmu.h
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -45,6 +45,8 @@ struct cpu_hw_events {
>  	phys_addr_t snapshot_addr_phys;
>  	/* Boolean flag to indicate setup is already done */
>  	bool snapshot_set_done;
> +	/* A shadow copy of the counter values to avoid clobbering during multiple SBI calls */
> +	u64 snapshot_cval_shcopy[RISCV_MAX_COUNTERS];
>  };
>  
>  struct riscv_pmu {




More information about the linux-riscv mailing list