[PATCH v10 4/5] drivers/perf: add DesignWare PCIe PMU driver

Ilkka Koskinen ilkka at os.amperecomputing.com
Wed Nov 15 19:50:04 PST 2023


Hi Shuai,

I have a few comments below


On Sat, 4 Nov 2023, Shuai Xue wrote:
> This commit adds the PCIe Performance Monitoring Unit (PMU) driver support
> for T-Head Yitian SoC chip. Yitian is based on the Synopsys PCI Express
> Core controller IP which provides statistics feature. The PMU is a PCIe
> configuration space register block provided by each PCIe Root Port in a
> Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error
> injection, and Statistics).
>
> To facilitate collection of statistics the controller provides the
> following two features for each Root Port:
>
> - one 64-bit counter for Time Based Analysis (RX/TX data throughput and
>  time spent in each low-power LTSSM state) and
> - one 32-bit counter for Event Counting (error and non-error events for
>  a specified lane)
>
> Note: There is no interrupt for counter overflow.
>
> This driver adds PMU devices for each PCIe Root Port. And the PMU device is
> named based the BDF of Root Port. For example,
>
>    30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
>
> the PMU device name for this Root Port is dwc_rootport_3018.
>
> Example usage of counting PCIe RX TLP data payload (Units of bytes)::
>
>    $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
>
> average RX bandwidth can be calculated like this:
>
>    PCIe TX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window
>
> Signed-off-by: Shuai Xue <xueshuai at linux.alibaba.com>
> Reviewed-by: Baolin Wang <baolin.wang at linux.alibaba.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron at huawei.com>
> Reviewed-by: Yicong Yang <yangyicong at hisilicon.com>
> ---
> drivers/perf/Kconfig        |   7 +
> drivers/perf/Makefile       |   1 +
> drivers/perf/dwc_pcie_pmu.c | 798 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 806 insertions(+)
> create mode 100644 drivers/perf/dwc_pcie_pmu.c

...

> diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
> new file mode 100644
> index 000000000000..9485c41de322
> --- /dev/null
> +++ b/drivers/perf/dwc_pcie_pmu.c
> @@ -0,0 +1,798 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Synopsys DesignWare PCIe PMU driver
> + *
> + * Copyright (C) 2021-2023 Alibaba Inc.
> + */
> +

...

> +static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
> +					  bool enable)
> +{
> +	struct pci_dev *pdev = pcie_pmu->pdev;
> +	u16 ras_des_offset = pcie_pmu->ras_des_offset;
> +
> +	if (enable)
> +		pci_clear_and_set_dword(pdev,
> +			ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
> +			DWC_PCIE_TIME_BASED_TIMER_START, 0x1);
> +	else
> +		pci_clear_and_set_dword(pdev,
> +			ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
> +			DWC_PCIE_TIME_BASED_TIMER_START, 0x0);

It's a matter of taste, but you could simply do:

     pci_clear_and_set_dword(pdev,
                  ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
                  DWC_PCIE_TIME_BASED_TIMER_START, enable);


However, I'm fine with either way.

> +static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
> +{
> +	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
> +	struct pci_dev *pdev = pcie_pmu->pdev;
> +	u16 ras_des_offset = pcie_pmu->ras_des_offset;
> +	u32 val;
> +
> +	pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
> +
> +	return val;
> +}

...

> +static int dwc_pcie_register_dev(struct pci_dev *pdev)
> +{
> +	struct platform_device *plat_dev;
> +	struct dwc_pcie_dev_info *dev_info;
> +	int ret;
> +	u32 bdf;
> +
> +	bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
> +	plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", bdf,
> +						 pdev, sizeof(*pdev));
> +	ret = PTR_ERR_OR_ZERO(plat_dev);
> +	if (ret)
> +             return ret;

platform_device_register_data() doesn't return a null pointer and you 
don't really need 'ret'. You could do something like instead:

    if (IS_ERR(plat_dev))
           return PTR_ERR(plat_dev);

> +	dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
> +	if (!dev_info)
> +		return -ENOMEM;
> +
> +	/* Cache platform device to handle pci device hotplug */
> +	dev_info->plat_dev = plat_dev;
> +	dev_info->pdev = pdev;
> +	list_add(&dev_info->dev_node, &dwc_pcie_dev_info_head);
> +
> +	return 0;
> +}
> +
> +static int dwc_pcie_pmu_notifier(struct notifier_block *nb,
> +				     unsigned long action, void *data)
> +{
> +	struct device *dev = data;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct dwc_pcie_dev_info *dev_info;
> +
> +	switch (action) {
> +	case BUS_NOTIFY_ADD_DEVICE:
> +		if (!dwc_pcie_match_des_cap(pdev))
> +			return NOTIFY_DONE;
> +		if (dwc_pcie_register_dev(pdev))
> +			return NOTIFY_BAD;
> +		break;
> +	case BUS_NOTIFY_DEL_DEVICE:
> +		dev_info = dwc_pcie_find_dev_info(pdev);
> +		if (!dev_info)
> +			return NOTIFY_DONE;
> +		dwc_pcie_unregister_dev(dev_info);
> +		break;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block dwc_pcie_pmu_nb = {
> +	.notifier_call = dwc_pcie_pmu_notifier,
> +};
> +
> +static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
> +{
> +	struct pci_dev *pdev = plat_dev->dev.platform_data;
> +	struct dwc_pcie_pmu *pcie_pmu;
> +	char *name;
> +	u32 bdf, val;
> +	u16 vsec;
> +	int ret;
> +
> +	vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA,
> +					DWC_PCIE_VSEC_RAS_DES_ID);

You nicely changed to use vendor list in this version but here the driver 
still tries to find Alibaba specific capability. I guess, you could search 
again using the vendor list. The other option would be to make 
dwc_pcie_match_des_cap() to return the vendor id, pass it to 
dwc_pcie_register_dev(), which would add it to device's platform data with
the pointer to the pci device.

Cheers, Ilkka


> +	pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
> +	bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
> +	name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", bdf);
> +	if (!name)
> +		return -ENOMEM;
> +
> +	pcie_pmu = devm_kzalloc(&plat_dev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
> +	if (!pcie_pmu)
> +		return -ENOMEM;
> +
> +	pcie_pmu->pdev = pdev;
> +	pcie_pmu->ras_des_offset = vsec;
> +	pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
> +	pcie_pmu->on_cpu = -1;
> +	pcie_pmu->pmu = (struct pmu){
> +		.name		= name,
> +		.parent		= &pdev->dev,
> +		.module		= THIS_MODULE,
> +		.attr_groups	= dwc_pcie_attr_groups,
> +		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
> +		.task_ctx_nr	= perf_invalid_context,
> +		.event_init	= dwc_pcie_pmu_event_init,
> +		.add		= dwc_pcie_pmu_event_add,
> +		.del		= dwc_pcie_pmu_event_del,
> +		.start		= dwc_pcie_pmu_event_start,
> +		.stop		= dwc_pcie_pmu_event_stop,
> +		.read		= dwc_pcie_pmu_event_update,
> +	};
> +
> +	/* Add this instance to the list used by the offline callback */
> +	ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
> +				       &pcie_pmu->cpuhp_node);
> +	if (ret) {
> +		pci_err(pdev, "Error %d registering hotplug @%x\n", ret, bdf);
> +		return ret;
> +	}
> +
> +	/* Unwind when platform driver removes */
> +	ret = devm_add_action_or_reset(&plat_dev->dev,
> +				       dwc_pcie_pmu_remove_cpuhp_instance,
> +				       &pcie_pmu->cpuhp_node);
> +	if (ret)
> +		return ret;
> +
> +	ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
> +	if (ret) {
> +		pci_err(pdev, "Error %d registering PMU @%x\n", ret, bdf);
> +		return ret;
> +	}
> +	ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu,
> +				       pcie_pmu);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}




More information about the linux-arm-kernel mailing list