Re: [PATCH v9 4/5] perf: RISC-V: add support for SSE event

yang.susheng at zte.com.cn yang.susheng at zte.com.cn
Wed Jun 10 03:40:51 PDT 2026


Hi Zhanpeng,

> 
> In order to use SSE within PMU drivers, register an SSE handler for the
> local PMU event. Reuse the existing overflow IRQ handler and pass
> appropriate pt_regs. Add a config option RISCV_PMU_SBI_SSE to select event
> delivery via SSE events.
> 
> When the SSE path is used, also honor the return value from
> perf_event_overflow(). If perf core throttles or disables an event, do not
> immediately restart the overflowed counters from the SSE handler.
> 
> Signed-off-by: Clément Léger <cleger at rivosinc.com>
> Co-developed-by: Zhanpeng Zhang <zhangzhanpeng.jasper at bytedance.com>
> Signed-off-by: Zhanpeng Zhang <zhangzhanpeng.jasper at bytedance.com>
> ---
>  drivers/perf/Kconfig           | 10 +++++
>  drivers/perf/riscv_pmu.c       | 23 +++++++++++
>  drivers/perf/riscv_pmu_sbi.c   | 78 ++++++++++++++++++++++++++++++++-----
>  include/linux/perf/riscv_pmu.h |  5 +++
>  4 files changed, 104 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> index ab90932fc2d0..b6c58475091c 100644
> --- a/drivers/perf/Kconfig
> +++ b/drivers/perf/Kconfig
> @@ -105,6 +105,16 @@ config RISCV_PMU_SBI
>  	  full perf feature support i.e. counter overflow, privilege mode
>  	  filtering, counter configuration.
>  
> +config RISCV_PMU_SBI_SSE
> +	depends on RISCV_PMU && RISCV_SBI_SSE
> +	bool "RISC-V PMU SSE events"
> +	default n
> +	help
> +	  Say y if you want to use SSE events to deliver PMU interrupts. This
> +	  provides a way to profile the kernel at any level by using NMI-like
> +	  SSE events. Since SSE events can be intrusive, this option allows
> +	  selecting them only when needed.
> +
>  config STARFIVE_STARLINK_PMU
>  	depends on ARCH_STARFIVE || COMPILE_TEST
>  	depends on 64BIT
> diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
> index 8e3cd0f35336..a48e286d9394 100644
> --- a/drivers/perf/riscv_pmu.c
> +++ b/drivers/perf/riscv_pmu.c
> @@ -13,6 +13,7 @@
>  #include <linux/irqdesc.h>
>  #include <linux/perf/riscv_pmu.h>
>  #include <linux/printk.h>
> +#include <linux/riscv_sbi_sse.h>
>  #include <linux/smp.h>
>  #include <linux/sched_clock.h>
>  
> @@ -254,6 +255,24 @@ void riscv_pmu_start(struct perf_event *event, int flags)
>  	perf_event_update_userpage(event);
>  }
>  
> +#ifdef CONFIG_RISCV_PMU_SBI_SSE
> +static void riscv_pmu_disable(struct pmu *pmu)
> +{
> +	struct riscv_pmu *rvpmu = to_riscv_pmu(pmu);
> +
> +	if (rvpmu->sse_evt)
> +		sse_event_disable_local(rvpmu->sse_evt);
> +}
> +
> +static void riscv_pmu_enable(struct pmu *pmu)
> +{
> +	struct riscv_pmu *rvpmu = to_riscv_pmu(pmu);
> +
> +	if (rvpmu->sse_evt)
> +		sse_event_enable_local(rvpmu->sse_evt);
> +}
> +#endif
> +
>  static int riscv_pmu_add(struct perf_event *event, int flags)
>  {
>  	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> @@ -411,6 +430,10 @@ struct riscv_pmu *riscv_pmu_alloc(void)
>  		.event_mapped	= riscv_pmu_event_mapped,
>  		.event_unmapped	= riscv_pmu_event_unmapped,
>  		.event_idx	= riscv_pmu_event_idx,
> +#ifdef CONFIG_RISCV_PMU_SBI_SSE
> +		.pmu_enable	= riscv_pmu_enable,
> +		.pmu_disable	= riscv_pmu_disable,
> +#endif
>  		.add		= riscv_pmu_add,
>  		.del		= riscv_pmu_del,
>  		.start		= riscv_pmu_start,
> diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
> index 385af5e6e6d0..ac10ebd73c7c 100644
> --- a/drivers/perf/riscv_pmu_sbi.c
> +++ b/drivers/perf/riscv_pmu_sbi.c
> @@ -17,6 +17,7 @@
>  #include <linux/irqdomain.h>
>  #include <linux/of_irq.h>
>  #include <linux/of.h>
> +#include <linux/riscv_sbi_sse.h>
>  #include <linux/cpu_pm.h>
>  #include <linux/sched/clock.h>
>  #include <linux/soc/andes/irq.h>
> @@ -1038,10 +1039,10 @@ static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
>  		pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask);
>  }
>  
> -static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
> +static irqreturn_t pmu_sbi_ovf_handler(struct cpu_hw_events *cpu_hw_evt,
> +				       struct pt_regs *regs, bool from_sse)
>  {
>  	struct perf_sample_data data;
> -	struct pt_regs *regs;
>  	struct hw_perf_event *hw_evt;
>  	union sbi_pmu_ctr_info *info;
>  	int lidx, hidx, fidx;
> @@ -1049,7 +1050,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
>  	struct perf_event *event;
> +	int ev_overflow = 0;
>  	u64 overflow;
>  	u64 overflowed_ctrs = 0;
> -	struct cpu_hw_events *cpu_hw_evt = dev;
>  	u64 start_clock = sched_clock();
>  	struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
>  
> @@ -1059,13 +1059,15 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
>  	/* Firmware counter don't support overflow yet */
>  	fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
>  	if (fidx == RISCV_MAX_COUNTERS) {
> -		csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
> +		if (!from_sse)
> +			csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
>  		return IRQ_NONE;
>  	}
>  
>  	event = cpu_hw_evt->events[fidx];
>  	if (!event) {
> -		ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
> +		if (!from_sse)
> +			ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
>  		return IRQ_NONE;
>  	}
>  
> @@ -1080,16 +1082,16 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
>  
>  	/*
>  	 * Overflow interrupt pending bit should only be cleared after stopping
> -	 * all the counters to avoid any race condition.
> +	 * all the counters to avoid any race condition. When using SSE,
> +	 * interrupt is cleared when stopping counters.
>  	 */
> -	ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
> +	if (!from_sse)
> +		ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
>  
>  	/* No overflow bit is set */
>  	if (!overflow)
>  		return IRQ_NONE;
>  
> -	regs = get_irq_regs();
> -
>  	for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
>  		struct perf_event *event = cpu_hw_evt->events[lidx];
>  
> @@ -1133,18 +1136,65 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
>  			 * TODO: We will need to stop the guest counters once
>  			 * virtualization support is added.
>  			 */
> -			perf_event_overflow(event, &data, regs);
> +			ev_overflow |= perf_event_overflow(event, &data, regs);
>  		}
>  		/* Reset the state as we are going to start the counter after the loop */
>  		hw_evt->state = 0;
>  	}
>  
> -	pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
> +	if (!ev_overflow || !from_sse)
> +		pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
> +
>  	perf_sample_event_took(sched_clock() - start_clock);
>  
>  	return IRQ_HANDLED;
>  }
>  
> +static irqreturn_t pmu_sbi_ovf_irq_handler(int irq, void *dev)
> +{
> +	return pmu_sbi_ovf_handler(dev, get_irq_regs(), false);
> +}
> +
> +#ifdef CONFIG_RISCV_PMU_SBI_SSE
> +static int pmu_sbi_ovf_sse_handler(u32 evt, void *arg, struct pt_regs *regs)
> +{
> +	struct cpu_hw_events __percpu *hw_events = arg;
> +	struct cpu_hw_events *hw_event = raw_cpu_ptr(hw_events);
> +
> +	pmu_sbi_ovf_handler(hw_event, regs, true);
> +
> +	return 0;
> +}
> +
> +static int pmu_sbi_setup_sse(struct riscv_pmu *pmu)
> +{
> +	int ret;
> +	struct sse_event *evt;
> +	struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
> +
> +	evt = sse_event_register(SBI_SSE_EVENT_LOCAL_PMU_OVERFLOW, 0,
> +				 pmu_sbi_ovf_sse_handler, hw_events);
> +	if (IS_ERR(evt))
> +		return PTR_ERR(evt);
> +
> +	ret = sse_event_enable(evt);
> +	if (ret) {
> +		sse_event_unregister(evt);
> +		return ret;
> +	}
> +
> +	pr_info("using SSE for PMU event delivery\n");
> +	pmu->sse_evt = evt;
> +
> +	return ret;
> +}
> +#else
> +static int pmu_sbi_setup_sse(struct riscv_pmu *pmu)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif
> +
>  static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
>  {
>  	struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
> @@ -1195,6 +1242,10 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
>  	struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
>  	struct irq_domain *domain = NULL;
>  
> +	ret = pmu_sbi_setup_sse(pmu);
> +	if (!ret)
> +		return 0;
> +
>  	if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
>  		riscv_pmu_irq_num = RV_IRQ_PMU;
>  		riscv_pmu_use_irq = true;
> @@ -1229,7 +1280,7 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
>  		return -ENODEV;
>  	}
>  
> -	ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events);
> +	ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_irq_handler, "riscv-pmu", hw_events);
>  	if (ret) {
>  		pr_err("registering percpu irq failed [%d]\n", ret);
>  		return ret;
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> index f82a28040594..08fdcf6baf4e 100644
> --- a/include/linux/perf/riscv_pmu.h
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -28,6 +28,8 @@
>  
>  #define RISCV_PMU_CONFIG1_GUEST_EVENTS 0x1
>  
> +struct sse_event;
> +
>  struct cpu_hw_events {
>  	/* currently enabled events */
>  	int			n_events;
> @@ -54,6 +56,9 @@ struct riscv_pmu {
>  	char		*name;
>  
>  	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
> +#ifdef CONFIG_RISCV_PMU_SBI_SSE
> +	struct sse_event *sse_evt;
> +#endif
>  
>  	unsigned long	cmask;
>  	u64		(*ctr_read)(struct perf_event *event);
> -- 
> 2.50.1 (Apple Git-155)

We found that if RISCV_PMU_SBI_SSE is enabled, executing 'perf record -g 
-F 999 ls' may occasionally fail. If the program is more complex, the 
error occurs 100% of the time. After disabling RISCV_PMU_SBI_SSE, the 
program runs normally. Therefore, we suspect there may be an issue with 
the PMU_SBI_SSE functionality. Have you encountered this phenomenon? Do 
you have any troubleshooting suggestions?
Below are some fault information:
[root at localhost ~]# perf record -g -F 999 ls
[ 9478.867438] BUG: spinlock bad magic on CPU#2, ls/1046
[ 9478.871534] Unable to handle kernel paging request at virtual address ffffffc6004940d0
[ 9478.872804] Oops [#1]
[ 9478.873359] Modules linked in: xt_MASQUERADE xfrm_user xfrm_algo iptable_nat xt_addrtype
iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4
libcrc32c br_netfilter bridge stp llc overlay virtio_gpu virtio_dma_buf drm_shmem_helper
drm_client_lib drm_kms_helper nls_iso8859_1 drm drm_panel_orientation_quirks backlight configfs efivarfs
[ 9478.877513] CPU: 2 UID: 0 PID: 1046 Comm: ls Not tainted 6.13.0-00006-ge94d0b11d044-dirty #7
[ 9478.878544] Hardware name: QEMU QEMU Virtual Machine, BIOS 2.7 2/2/2022
[ 9478.879324] epc : string+0x48/0xe6
[ 9478.880100]  ra : vsnprintf+0xea/0x3b4
[ 9478.880697] epc : ffffffff809e39ae ra : ffffffff809e656e sp : ffffffc600493360
[ 9478.881331]  gp : ffffffff81516480 tp : ffffffd68d280000 t0 : ffffffc600493312
[ 9478.881952]  t1 : ffffffffffffffff t2 : 6e697073203a4755 s0 : ffffffc6004933a0
[ 9478.882557]  s1 : ffffffffffffffff a0 : ffffffc6004934d7 a1 : 0000000000000000
[ 9478.883355]  a2 : ffffffc6004934a8 a3 : ffffffc6004930d0 a4 : ffffffc6004940d0
[ 9478.883988]  a5 : ffffffc7004934d6 a6 : ffffffffffffe000 a7 : 0000000000000004
[ 9478.884600]  s2 : ffffffc6004934d7 s3 : ffffffc6004934a8 s4 : ffffffff80f24ea4
[ 9478.885420]  s5 : ffffffff000000ff s6 : ffffffff8100153e s7 : 0000000000ffffff
[ 9478.886056]  s8 : ffffffff8100153c s9 : 0000000000000007 s10: 0000000000000002
[ 9478.886859]  s11: ffffffc6004935f0 t3 : 0000000000000004 t4 : ffffffff80e592d8
[ 9478.887493]  t5 : ffffffff8152cb20 t6 : ffffffc60049307a
[ 9478.887946] status: 0000000200000100 badaddr: ffffffc6004940d0 cause: 000000000000000d
[ 9478.888886] [<ffffffff809e39ae>] string+0x48/0xe6
[ 9478.889342] [<ffffffff809e656e>] vsnprintf+0xea/0x3b4
[ 9478.889795] [<ffffffff8007c2ce>] vprintk_store+0x108/0x3d2
[ 9478.890459] [<ffffffff8007d0ce>] vprintk_emit+0x82/0x218
[ 9478.890932] [<ffffffff8007d27a>] vprintk_default+0x16/0x1e
[ 9478.891416] [<ffffffff8007e270>] vprintk+0x1e/0x3c
[ 9478.892011] [<ffffffff8000332a>] _printk+0x32/0x50
[ 9478.892441] [<ffffffff80002f84>] spin_dump+0x5e/0x6e
[ 9478.892865] [<ffffffff80077bf4>] do_raw_spin_unlock+0x130/0x132
[ 9478.893542] [<ffffffff809f50e0>] _raw_spin_unlock+0x10/0x22
[ 9478.894031] [<ffffffff80166118>] filemap_map_pages+0x314/0x434
[ 9478.894541] [<ffffffff8019f6ea>] __handle_mm_fault+0x9ac/0xd50
[ 9478.895556] Code: 97aa a809 7463 00c5 0023 00d5 0505 2585 0663 00f5 (4683) 0007 
[ 9478.896729] ---[ end trace 0000000000000000 ]---
[ 9478.897637] note: ls[1046] exited with irqs disabled
[ 9478.898653] note: ls[1046] exited with preempt_count 4

[root at localhost x264]# perf record -g -F 999 ./x264 -o output_static.mkv ./Kimono_1920x1080_24.yuv
--input-res 1920x1080 --fps 24 --preset faster --vbv-maxrate 2400 --vbv-bufsize 4800 --ref 1
--aq-mode 2 --aq-strength 1.2 --qcomp 0.8 --lookahead-threads 12 --ipratio 1.0 --bframes 3
--rc-lookahead 6 --crf 28 --keyint 60 --scenecut 0 --frames 200                                                                                                      
[   63.382672] Unable to handle kernel paging request at virtual address 0000003feb0ff5d8                                                               
[   63.383325] Oops [#1]                                                                                                                                
[   63.383395] Modules linked in: xt_MASQUERADE xfrm_user xfrm_algo iptable_nat xt_addrtype
iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4
libcrc32c br_netfilter bridge stp llc overlay virtio_gpu virtio_dma_buf drm_shmem_helper
drm_client_lib drm_kms_helper nls_iso8859_1 drm configfs drm_panel_orientation_quirks
backlight efivarfs                                                                          
[   63.385401] CPU: 3 UID: 0 PID: 580 Comm: x264 Not tainted 6.13.0-00006-ge94d0b11d044-dirty #7                                                
[   63.385521] Hardware name: QEMU QEMU Virtual Machine, BIOS 2.7 2/2/2022                                                                              
[   63.385732] epc : walk_stackframe+0x5c/0x11a                                                                                                         
[   63.386470]  ra : walk_stackframe+0xda/0x11a                                                                                                         
[   63.386537] epc : ffffffff80013b5c ra : ffffffff80013bda sp : ffffffc600493a40                                                                       
[   63.386579]  gp : ffffffff81516480 tp : ffffffd684b33300 t0 : ffffffd7febf9c20                                                                       
[   63.386617]  t1 : 000000003b9aca00 t2 : ffffffd7febe8080 s0 : ffffffc600493a90                                                                       
[   63.386654]  s1 : ffffffc600493ee0 a0 : 0000000000000001 a1 : ffffffff809f5a48                                                                       
[   63.386692]  a2 : 0000000000000002 a3 : 0000003feb0ff5d0 a4 : ffffffffffffc000                                                                       
[   63.386729]  a5 : 0000000000003fff a6 : ffffffff81552558 a7 : ffffffd698fb8000
[   63.386769]  s2 : 0000003feb100000 s3 : 0000003feb0ff5e0 s4 : ffffffff8001886a
[   63.386806]  s5 : ffffffc600493ac0 s6 : ffffffff809f5a48 s7 : 0000000000000000
[   63.386844]  s8 : ffffffc600493ac0 s9 : 0000000000000000 s10: 000000000007fff8
[   63.386886]  s11: ffffffd698fb8000 t3 : 0000000000000015 t4 : 00000000000003e7
[   63.386932]  t5 : 0000000000452bf5 t6 : ffffffc600493f70
[   63.386965] status: 0000000200000100 badaddr: 0000003feb0ff5d8 cause: 000000000000000d
[   63.387069] [<ffffffff80013b5c>] walk_stackframe+0x5c/0x11a
[   63.387122] [<ffffffff80018902>] perf_callchain_kernel+0x28/0x34
[   63.387154] [<ffffffff80160294>] get_perf_callchain+0x88/0x18e
[   63.387189] [<ffffffff8015bcf2>] perf_callchain+0x52/0x6e
[   63.387219] [<ffffffff8015c0f4>] perf_prepare_sample+0x3e6/0x730
[   63.387249] [<ffffffff8015c8d6>] perf_event_output_forward+0x56/0xb8
[   63.387280] [<ffffffff8015c51a>] __perf_event_overflow+0xdc/0x2e2
[   63.387310] [<ffffffff8015d954>] perf_event_overflow+0x12/0x1a
[   63.387341] [<ffffffff807e15e0>] pmu_sbi_ovf_handler+0x684/0x736
[   63.387383] [<ffffffff807e16ee>] pmu_sbi_ovf_sse_handler+0x26/0x30
[   63.387417] [<ffffffff807b73a0>] sse_handle_event+0x16/0x48
[   63.387449] [<ffffffff8001933a>] do_sse+0x76/0xa8
[   63.387478] [<ffffffff800195ac>] handle_sse+0xc0/0x162
[   63.387748] Code: 7933 00e9 6463 0539 f693 0079 e2a1 e062 8693 ff09 (bc03) 0086 
[   63.388072] ---[ end trace 0000000000000000 ]---
[   63.388547] Kernel panic - not syncing: Fatal exception in interrupt
[   63.388870] SMP: stopping secondary CPUs

Regards
Yangsusheng



More information about the kvm-riscv mailing list