[PATCH V2 2/9] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1

Suzuki K Poulose Suzuki.Poulose at arm.com
Thu Apr 14 03:22:46 PDT 2016


On 06/04/16 16:12, Tyler Baicar wrote:
> Currently when a RAS error is reported it is not timestamped.
> The ACPI 6.1 spec adds the timestamp field to the generic error
> data entry v3 structure. The timestamp of when the firmware
> generated the error is now being reported.
>
> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang at codeaurora.org>
> Signed-off-by: Richard Ruigrok <rruigrok at codeaurora.org>
> Signed-off-by: Tyler Baicar <tbaicar at codeaurora.org>
> Signed-off-by: Naveen Kaje <nkaje at codeaurora.org>
> ---
>   drivers/acpi/apei/ghes.c    |  35 ++++++++++++--
>   drivers/firmware/efi/cper.c | 111 +++++++++++++++++++++++++++++++++++++-------
>   2 files changed, 126 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 9b0543e..a6848706 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -419,7 +419,15 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
>   	int flags = -1;
>   	int sec_sev = ghes_severity(gdata->error_severity);
>   	struct cper_sec_mem_err *mem_err;
> -	mem_err = (struct cper_sec_mem_err *)(gdata + 1);
> +	struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
> +
> +	if ((gdata->revision >> 8) >= 0x03)

Could we please make that a macro ? We seem to be using the check everywhere.

> +		gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
> +
> +	if (gdata_v3)
> +		mem_err = (struct cper_sec_mem_err *)(gdata_v3 + 1);
> +	else
> +		mem_err = (struct cper_sec_mem_err *)(gdata + 1);
>
>   	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
>   		return;
> @@ -449,14 +457,27 @@ static void ghes_do_proc(struct ghes *ghes,
>   {
>   	int sev, sec_sev;
>   	struct acpi_hest_generic_data *gdata;
> +	struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
> +	uuid_le sec_type;
>
>   	sev = ghes_severity(estatus->error_severity);
>   	apei_estatus_for_each_section(estatus, gdata) {
>   		sec_sev = ghes_severity(gdata->error_severity);
> -		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
> +		sec_type = *(uuid_le *)gdata->section_type;
> +
> +		if ((gdata->revision >> 8) >= 0x03)
> +			gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
> +
> +		if (!uuid_le_cmp(sec_type,
>   				 CPER_SEC_PLATFORM_MEM)) {
>   			struct cper_sec_mem_err *mem_err;
> -			mem_err = (struct cper_sec_mem_err *)(gdata+1);
> +
> +			if (gdata_v3)
> +				mem_err = (struct cper_sec_mem_err *)
> +					  (gdata_v3 + 1);
> +			else
> +				mem_err = (struct cper_sec_mem_err *)
> +					  (gdata + 1);
>   			ghes_edac_report_mem_error(ghes, sev, mem_err);
>
>   			arch_apei_report_mem_error(sev, mem_err);
> @@ -466,7 +487,13 @@ static void ghes_do_proc(struct ghes *ghes,
>   		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
>   				      CPER_SEC_PCIE)) {
>   			struct cper_sec_pcie *pcie_err;
> -			pcie_err = (struct cper_sec_pcie *)(gdata+1);
> +
> +			if (gdata_v3)
> +				pcie_err = (struct cper_sec_pcie *)
> +					   (gdata_v3 + 1);
> +			else
> +				pcie_err = (struct cper_sec_pcie *)
> +					   (gdata + 1);
>   			if (sev == GHES_SEV_RECOVERABLE &&
>   			    sec_sev == GHES_SEV_RECOVERABLE &&
>   			    pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index d425374..23f62962 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -32,6 +32,8 @@
>   #include <linux/acpi.h>
>   #include <linux/pci.h>
>   #include <linux/aer.h>
> +#include <linux/printk.h>
> +#include <linux/bcd.h>
>
>   #define INDENT_SP	" "
>
> @@ -392,6 +394,10 @@ static void cper_estatus_print_section(
>   	uuid_le *sec_type = (uuid_le *)gdata->section_type;
>   	__u16 severity;
>   	char newpfx[64];
> +	struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
> +
> +	if ((gdata->revision >> 8) >= 0x03)
> +		gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
>
>   	severity = gdata->error_severity;
>   	printk("%s""Error %d, type: %s\n", pfx, sec_no,
> @@ -403,14 +409,24 @@ static void cper_estatus_print_section(
>
>   	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>   	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
> -		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
> +		struct cper_sec_proc_generic *proc_err;
> +
> +		if (gdata_v3)
> +			proc_err = (void *)(gdata_v3 + 1);
> +		else
> +			proc_err = (void *)(gdata + 1);
>   		printk("%s""section_type: general processor error\n", newpfx);
>   		if (gdata->error_data_length >= sizeof(*proc_err))
>   			cper_print_proc_generic(newpfx, proc_err);
>   		else
>   			goto err_section_too_small;
>   	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
> -		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
> +		struct cper_sec_mem_err *mem_err;
> +
> +		if (gdata_v3)
> +			mem_err = (void *)(gdata_v3 + 1);
> +		else
> +			mem_err = (void *)(gdata + 1);
>   		printk("%s""section_type: memory error\n", newpfx);
>   		if (gdata->error_data_length >=
>   		    sizeof(struct cper_sec_mem_err_old))
> @@ -419,7 +435,12 @@ static void cper_estatus_print_section(
>   		else
>   			goto err_section_too_small;
>   	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
> -		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
> +		struct cper_sec_pcie *pcie;
> +
> +		if (gdata_v3)
> +			pcie = (void *)(gdata_v3 + 1);
> +		else
> +			pcie = (void *)(gdata + 1);


The only use of the gdata_v3 above cases is to get the payload(or error_record).
So instead of spilling these checks all over could we use something like :

#define acpi_hest_generic_data_version(gdata) \
	(gdata->revision >> 8)

static inline void *
acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
{
	return acpi_hest_generic_data_version(gdata) >= 3 ?
		((struct acpi_hest_generic_data_v300 *)(gdata)) + 1 :
		gdata + 1;
}

And then do :

	void *payload = acpi_hest_generic_data_payload(gdata);


>   		printk("%s""section_type: PCIe error\n", newpfx);
>   		if (gdata->error_data_length >= sizeof(*pcie))
>   			cper_print_pcie(newpfx, pcie, gdata);
> @@ -434,10 +455,38 @@ err_section_too_small:
>   	pr_err(FW_WARN "error section length is too small\n");
>   }
>
> +static void cper_estatus_print_section_v300(const char *pfx,
> +	const struct acpi_hest_generic_data_v300 *gdata, int sec_no)
> +{
> +	__u8 hour, min, sec, day, mon, year, century, *timestamp;
> +
> +	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
> +		timestamp = (__u8 *)&(gdata->time_stamp);
> +		memcpy(&sec, timestamp, 1);
> +		memcpy(&min, timestamp + 1, 1);
> +		memcpy(&hour, timestamp + 2, 1);
> +		memcpy(&day, timestamp + 4, 1);
> +		memcpy(&mon, timestamp + 5, 1);
> +		memcpy(&year, timestamp + 6, 1);
> +		memcpy(&century, timestamp + 7, 1);
> +		printk("%stime: ", pfx);
> +		printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
> +		printk(" %02d:%02d:%02d %02d%02d-%02d-%02d\n",
> +			bcd2bin(hour), bcd2bin(min), bcd2bin(sec),
> +			bcd2bin(century), bcd2bin(year), bcd2bin(mon),
> +			bcd2bin(day));
> +	}
> +
> +	cper_estatus_print_section(pfx,
> +				   (const struct acpi_hest_generic_data *)gdata,
> +				   sec_no);
> +}


Wouldn't it be better do the v3 header check from cper_erstatus_print_section() and call out
to cper_erstatus_print_section_v300() ? That way, we can leave the callers unaffected,
even for future changes.


> +	if (gdata_v3) {
> +		while (data_len >= sizeof(*gdata_v3)) {
> +			gedata_len = gdata_v3->error_data_length;
> +			cper_estatus_print_section_v300(newpfx, gdata_v3,
> +							sec_no);
> +			data_len -= gedata_len + sizeof(*gdata_v3);
> +			gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
> +			sec_no++;
> +		}
> +	} else {
> +		while (data_len >= sizeof(*gdata)) {
> +			gedata_len = gdata->error_data_length;
> +			cper_estatus_print_section(newpfx, gdata, sec_no);
> +			data_len -= gedata_len + sizeof(*gdata);
> +			gdata = (void *)(gdata + 1) + gedata_len;
> +			sec_no++;
> +		}

With the change mentioned above and storing the sizeof(), we could make this
hunk a bit more cleaner.


Suzuki



More information about the linux-arm-kernel mailing list