[PATCH v7 3/4] remoteproc: qcom: Add capability to collect minidumps

Bjorn Andersson bjorn.andersson at linaro.org
Wed Nov 18 11:49:39 EST 2020


On Tue 03 Nov 03:19 CST 2020, Siddharth Gupta wrote:

> This patch adds support for collecting minidump in the event of remoteproc
> crash. Parse the minidump table based on remoteproc's unique minidump-id,
> read all memory regions from the remoteproc's minidump table entry and
> expose the memory to userspace. The remoteproc platform driver can choose
> to collect a full/mini dump by specifying the coredump op.
> 
> Co-developed-by: Rishabh Bhatnagar <rishabhb at codeaurora.org>
> Signed-off-by: Rishabh Bhatnagar <rishabhb at codeaurora.org>
> Co-developed-by: Gurbir Arora <gurbaror at codeaurora.org>
> Signed-off-by: Gurbir Arora <gurbaror at codeaurora.org>
> Signed-off-by: Siddharth Gupta <sidgup at codeaurora.org>
> ---
>  drivers/remoteproc/qcom_minidump.h |  64 +++++++++++++++++++++++
>  drivers/remoteproc/qcom_q6v5_pas.c | 104 ++++++++++++++++++++++++++++++++++++-
>  2 files changed, 166 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/remoteproc/qcom_minidump.h
> 
> diff --git a/drivers/remoteproc/qcom_minidump.h b/drivers/remoteproc/qcom_minidump.h

This only needs to live in a header file if it's going to be accessed
from more than 1 c-file.

> new file mode 100644
> index 0000000..5857d06
> --- /dev/null
> +++ b/drivers/remoteproc/qcom_minidump.h
> @@ -0,0 +1,64 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
> + */
> +
> +#ifndef __QCOM_MINIDUMP_H
> +#define __QCOM_MINIDUMP_H
> +
> +#define MAX_NUM_OF_SS           10
> +#define MAX_REGION_NAME_LENGTH  16
> +#define SBL_MINIDUMP_SMEM_ID	602
> +#define MD_REGION_VALID		('V' << 24 | 'A' << 16 | 'L' << 8 | 'I' << 0)
> +#define MD_SS_ENCR_DONE		('D' << 24 | 'O' << 16 | 'N' << 8 | 'E' << 0)
> +#define MD_SS_ENABLED		('E' << 24 | 'N' << 16 | 'B' << 8 | 'L' << 0)
> +
> +/**
> + * struct minidump_region - Minidump region
> + * @name		: Name of the region to be dumped
> + * @seq_num:		: Use to differentiate regions with same name.
> + * @valid		: This entry to be dumped (if set to 1)
> + * @address		: Physical address of region to be dumped
> + * @size		: Size of the region
> + */
> +struct minidump_region {
> +	char	name[MAX_REGION_NAME_LENGTH];
> +	__le32	seq_num;
> +	__le32	valid;
> +	__le64	address;
> +	__le64	size;
> +};
> +
> +/**
> + * struct minidump_subsystem_toc: Subsystem's SMEM Table of content
> + * @status : Subsystem toc init status
> + * @enabled : if set to 1, this region would be copied during coredump
> + * @encryption_status: Encryption status for this subsystem
> + * @encryption_required : Decides to encrypt the subsystem regions or not
> + * @ss_region_count : Number of regions added in this subsystem toc
> + * @md_ss_smem_regions_baseptr : regions base pointer of the subsystem
> + */
> +struct minidump_subsystem_toc {
> +	__le32	status;
> +	__le32	enabled;
> +	__le32	encryption_status;
> +	__le32	encryption_required;
> +	__le32	ss_region_count;

Please drop the "ss_" prefix.

> +	__le64	md_ss_smem_regions_baseptr;

Please drop the "md_ss_smem_" prefix.

> +};
> +
> +/**
> + * struct minidump_global_toc: Global Table of Content
> + * @md_toc_init : Global Minidump init status
> + * @md_revision : Minidump revision
> + * @md_enable_status : Minidump enable status
> + * @md_ss_toc : Array of subsystems toc
> + */
> +struct minidump_global_toc {
> +	__le32				status;
> +	__le32				md_revision;
> +	__le32				enabled;
> +	struct minidump_subsystem_toc	md_ss_toc[MAX_NUM_OF_SS];

How about "subsystems" and how about dropping the "_toc" suffix on the
type?

> +};
> +
> +#endif
> diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
> index 3837f23..349f725 100644
> --- a/drivers/remoteproc/qcom_q6v5_pas.c
> +++ b/drivers/remoteproc/qcom_q6v5_pas.c
> @@ -28,11 +28,13 @@
>  #include "qcom_pil_info.h"
>  #include "qcom_q6v5.h"
>  #include "remoteproc_internal.h"
> +#include "qcom_minidump.h"
>  
>  struct adsp_data {
>  	int crash_reason_smem;
>  	const char *firmware_name;
>  	int pas_id;
> +	unsigned int minidump_id;
>  	bool has_aggre2_clk;
>  	bool auto_boot;
>  
> @@ -63,6 +65,7 @@ struct qcom_adsp {
>  	int proxy_pd_count;
>  
>  	int pas_id;
> +	unsigned int minidump_id;
>  	int crash_reason_smem;
>  	bool has_aggre2_clk;
>  	const char *info_name;
> @@ -116,6 +119,88 @@ static void adsp_pds_disable(struct qcom_adsp *adsp, struct device **pds,
>  	}
>  }
>  
> +static void adsp_minidump_cleanup(struct rproc *rproc)
> +{
> +	struct rproc_dump_segment *entry, *tmp;
> +
> +	list_for_each_entry_safe(entry, tmp, &rproc->dump_segments, node) {
> +		list_del(&entry->node);
> +		kfree(entry->priv);
> +		kfree(entry);
> +	}
> +}
> +
> +static void adsp_add_minidump_segments(struct rproc *rproc,
> +				       struct minidump_subsystem_toc *minidump_ss)
> +{
> +	struct minidump_region __iomem *ptr;
> +	struct minidump_region region;
> +	int seg_cnt, i;
> +	dma_addr_t da;
> +	size_t size;
> +	char *name;
> +
> +	if (!list_empty(&rproc->dump_segments)) {

if (WARN_ON(!list_empty()))

Because this would only happen if we have a bug somewhere that leaves
items lingering on the dump_segments list.

> +		dev_err(&rproc->dev, "dump segment list already populated\n");
> +		return;
> +	}
> +
> +	seg_cnt = le32_to_cpu(minidump_ss->ss_region_count);
> +	ptr = ioremap((unsigned long)le64_to_cpu(minidump_ss->md_ss_smem_regions_baseptr),
> +		      seg_cnt * sizeof(struct minidump_region));
> +
> +	if (!ptr)
> +		return;
> +
> +	for (i = 0; i < seg_cnt; i++) {
> +		memcpy_fromio(&region, ptr + i, sizeof(region));
> +		if (region.valid == MD_REGION_VALID) {
> +			name = kmalloc(MAX_REGION_NAME_LENGTH, GFP_KERNEL);
> +			strlcpy(name, region.name, MAX_REGION_NAME_LENGTH);

Please use kstrdup() and don't forget to check for (and handle)
allocation failures.

> +			da = le64_to_cpu(region.address);
> +			size = le32_to_cpu(region.size);
> +			rproc_coredump_add_custom_segment(rproc, da, size, NULL, name);
> +		}
> +	}
> +
> +	iounmap(ptr);
> +}
> +
> +static void adsp_dump(struct rproc *rproc)

Here I think it makes sense to spell out adsp_minidump()

That said, the only thing I see specific to this driver here is the
use of adsp->minidump_id, so how about moving all this to qcom_common.c
and just call qcom_minidump(rproc, adsp->minidump_id); from here?

That way we can easily integrate it in the other remoteprocs as needed
later.

> +{
> +	struct qcom_adsp *adsp = rproc->priv;
> +	struct minidump_subsystem_toc *minidump_ss;
> +	struct minidump_global_toc *minidump_toc;

How about just naming this "toc" and minidump_ss just "minidump"?

> +
> +	/* Get Global minidump ToC*/
> +	minidump_toc = qcom_smem_get(QCOM_SMEM_HOST_ANY, SBL_MINIDUMP_SMEM_ID, NULL);
> +
> +	/* check if global table pointer exists and init is set */
> +	if (IS_ERR(minidump_toc) || !minidump_toc->status) {
> +		dev_err(&rproc->dev, "SMEM is not initialized.\n");

"Minidump TOC not found in SMEM\n"

> +		return;
> +	}
> +
> +	/* Get subsystem table of contents using the minidump id */
> +	minidump_ss = &minidump_toc->md_ss_toc[adsp->minidump_id];
> +
> +	/**
> +	 * Collect minidump if SS ToC is valid and segment table
> +	 * is initialized in memory and encryption status is set.
> +	 */
> +	if (minidump_ss->md_ss_smem_regions_baseptr == 0 ||
> +	    le32_to_cpu(minidump_ss->status) != 1 ||
> +	    le32_to_cpu(minidump_ss->enabled) != MD_SS_ENABLED ||
> +	    le32_to_cpu(minidump_ss->encryption_status) != MD_SS_ENCR_DONE) {
> +		dev_err(&rproc->dev, "Minidump not ready!! Aborting\n");

"Minidump not ready, skipping\n"

> +		return;
> +	}
> +
> +	adsp_add_minidump_segments(rproc, minidump_ss);
> +	rproc_minidump(rproc);
> +	adsp_minidump_cleanup(rproc);
> +}
> +
>  static int adsp_load(struct rproc *rproc, const struct firmware *fw)
>  {
>  	struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
> @@ -258,6 +343,15 @@ static const struct rproc_ops adsp_ops = {
>  	.panic = adsp_panic,
>  };
>  
> +static const struct rproc_ops adsp_minidump_ops = {
> +	.start = adsp_start,
> +	.stop = adsp_stop,
> +	.da_to_va = adsp_da_to_va,
> +	.load = adsp_load,
> +	.panic = adsp_panic,
> +	.coredump = adsp_dump,
> +};
> +
>  static int adsp_init_clock(struct qcom_adsp *adsp)
>  {
>  	int ret;
> @@ -398,8 +492,13 @@ static int adsp_probe(struct platform_device *pdev)
>  	if (ret < 0 && ret != -EINVAL)
>  		return ret;
>  
> -	rproc = rproc_alloc(&pdev->dev, pdev->name, &adsp_ops,
> -			    fw_name, sizeof(*adsp));
> +	if (desc->minidump_id)

Please use a local variable to reference adsp_minidump_ops vs adsp_ops.
Instead of making the whole thing conditional.

Regards,
Bjorn

> +		rproc = rproc_alloc(&pdev->dev, pdev->name, &adsp_minidump_ops, fw_name,
> +				    sizeof(*adsp));
> +	else
> +		rproc = rproc_alloc(&pdev->dev, pdev->name, &adsp_ops, fw_name,
> +				    sizeof(*adsp));
> +
>  	if (!rproc) {
>  		dev_err(&pdev->dev, "unable to allocate remoteproc\n");
>  		return -ENOMEM;
> @@ -411,6 +510,7 @@ static int adsp_probe(struct platform_device *pdev)
>  	adsp = (struct qcom_adsp *)rproc->priv;
>  	adsp->dev = &pdev->dev;
>  	adsp->rproc = rproc;
> +	adsp->minidump_id = desc->minidump_id;
>  	adsp->pas_id = desc->pas_id;
>  	adsp->has_aggre2_clk = desc->has_aggre2_clk;
>  	adsp->info_name = desc->sysmon_name;
> -- 
> Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
> 



More information about the linux-arm-kernel mailing list