[PATCH V13 08/10] ras: acpi / apei: generate trace event for unrecognized CPER section

Shiju Jose shiju.jose at huawei.com
Thu Mar 23 11:44:30 PDT 2017


Tested-by: Shiju Jose <shiju.jose at huawei.com>

> -----Original Message-----
> From: Tyler Baicar [mailto:tbaicar at codeaurora.org]
> Sent: 21 March 2017 22:47
> To: christoffer.dall at linaro.org; marc.zyngier at arm.com;
> pbonzini at redhat.com; rkrcmar at redhat.com; linux at armlinux.org.uk;
> catalin.marinas at arm.com; will.deacon at arm.com; rjw at rjwysocki.net;
> lenb at kernel.org; matt at codeblueprint.co.uk; robert.moore at intel.com;
> lv.zheng at intel.com; nkaje at codeaurora.org; zjzhang at codeaurora.org;
> mark.rutland at arm.com; james.morse at arm.com; akpm at linux-foundation.org;
> eun.taik.lee at samsung.com; sandeepa.s.prabhu at gmail.com;
> labbott at redhat.com; shijie.huang at arm.com; rruigrok at codeaurora.org;
> paul.gortmaker at windriver.com; tn at semihalf.com; fu.wei at linaro.org;
> rostedt at goodmis.org; bristot at redhat.com; linux-arm-
> kernel at lists.infradead.org; kvmarm at lists.cs.columbia.edu;
> kvm at vger.kernel.org; linux-kernel at vger.kernel.org; linux-
> acpi at vger.kernel.org; linux-efi at vger.kernel.org; devel at acpica.org;
> Suzuki.Poulose at arm.com; punit.agrawal at arm.com; astone at redhat.com;
> harba at codeaurora.org; hanjun.guo at linaro.org; John Garry; Shiju Jose;
> joe at perches.com
> Cc: Tyler Baicar
> Subject: [PATCH V13 08/10] ras: acpi / apei: generate trace event for
> unrecognized CPER section
> 
> UEFI spec allows for non-standard section in Common Platform Error
> Record. This is defined in section N.2.3 of UEFI version 2.5.
> 
> Currently if the CPER section's type (UUID) does not match with any
> section type that the kernel knows how to parse, trace event is not
> generated for such section. And thus user is not able to know happening
> of such hardware error, including error record of non-standard section.
> 
> This commit generates a trace event which contains raw error data for
> unrecognized CPER section.
> 
> Signed-off-by: Tyler Baicar <tbaicar at codeaurora.org>
> CC: Jonathan (Zhixiong) Zhang <zjzhang at codeaurora.org>
> ---
>  drivers/acpi/apei/ghes.c | 24 ++++++++++++++++++++++--
>  drivers/ras/ras.c        |  1 +
>  include/ras/ras_event.h  | 45
> +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 68 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
> 7e3e5e0..3ecbacc 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -45,11 +45,13 @@
>  #include <linux/aer.h>
>  #include <linux/nmi.h>
>  #include <linux/sched/clock.h>
> +#include <linux/uuid.h>
> 
>  #include <acpi/actbl1.h>
>  #include <acpi/ghes.h>
>  #include <acpi/apei.h>
>  #include <asm/tlbflush.h>
> +#include <ras/ras_event.h>
> 
>  #include "apei-internal.h"
> 
> @@ -454,11 +456,21 @@ static void ghes_do_proc(struct ghes *ghes,  {
>  	int sev, sec_sev;
>  	struct acpi_hest_generic_data *gdata;
> +	uuid_le sec_type;
> +	uuid_le *fru_id = &NULL_UUID_LE;
> +	char *fru_text = "";
> 
>  	sev = ghes_severity(estatus->error_severity);
>  	apei_estatus_for_each_section(estatus, gdata) {
>  		sec_sev = ghes_severity(gdata->error_severity);
> -		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
> +		sec_type = *(uuid_le *)gdata->section_type;
> +
> +		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> +			fru_id = (uuid_le *)gdata->fru_id;
> +		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> +			fru_text = gdata->fru_text;
> +
> +		if (!uuid_le_cmp(sec_type,
>  				 CPER_SEC_PLATFORM_MEM)) {
>  			struct cper_sec_mem_err *mem_err;
> 
> @@ -469,7 +481,7 @@ static void ghes_do_proc(struct ghes *ghes,
>  			ghes_handle_memory_failure(gdata, sev);
>  		}
>  #ifdef CONFIG_ACPI_APEI_PCIEAER
> -		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
> +		else if (!uuid_le_cmp(sec_type,
>  				      CPER_SEC_PCIE)) {
>  			struct cper_sec_pcie *pcie_err;
> 
> @@ -502,6 +514,14 @@ static void ghes_do_proc(struct ghes *ghes,
> 
>  		}
>  #endif
> +#ifdef CONFIG_RAS
> +		else if (trace_unknown_sec_event_enabled()) {
> +			void *unknown_err =
> acpi_hest_generic_data_payload(gdata);
> +			trace_unknown_sec_event(&sec_type,
> +					fru_id, fru_text, sec_sev,
> +					unknown_err, gdata->error_data_length);
> +		}
> +#endif
>  	}
>  }
> 
> diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index
> b67dd36..fb2500b 100644
> --- a/drivers/ras/ras.c
> +++ b/drivers/ras/ras.c
> @@ -27,3 +27,4 @@ static int __init ras_init(void)
> EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
>  #endif
>  EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index
> 1791a12..5861b6f 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -162,6 +162,51 @@
>  );
> 
>  /*
> + * Unknown Section Report
> + *
> + * This event is generated when hardware detected a hardware
> + * error event, which may be of non-standard section as defined
> + * in UEFI spec appendix "Common Platform Error Record", or may
> + * be of sections for which TRACE_EVENT is not defined.
> + *
> + */
> +TRACE_EVENT(unknown_sec_event,
> +
> +	TP_PROTO(const uuid_le *sec_type,
> +		 const uuid_le *fru_id,
> +		 const char *fru_text,
> +		 const u8 sev,
> +		 const u8 *err,
> +		 const u32 len),
> +
> +	TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
> +
> +	TP_STRUCT__entry(
> +		__array(char, sec_type, 16)
> +		__array(char, fru_id, 16)
> +		__string(fru_text, fru_text)
> +		__field(u8, sev)
> +		__field(u32, len)
> +		__dynamic_array(u8, buf, len)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->sec_type, sec_type, sizeof(uuid_le));
> +		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
> +		__assign_str(fru_text, fru_text);
> +		__entry->sev = sev;
> +		__entry->len = len;
> +		memcpy(__get_dynamic_array(buf), err, len);
> +	),
> +
> +	TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d;
> raw data:%s",
> +		  __entry->sev, __entry->sec_type,
> +		  __entry->fru_id, __get_str(fru_text),
> +		  __entry->len,
> +		  __print_hex(__get_dynamic_array(buf), __entry->len)) );
> +
> +/*
>   * PCIe AER Trace event
>   *
>   * These events are generated when hardware detects a corrected or
> --
> Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
> Technologies, Inc.
> Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a
> Linux Foundation Collaborative Project.




More information about the linux-arm-kernel mailing list