[PATCH V15 01/11] acpi: apei: read ack upon ghes record consumption

Baicar, Tyler tbaicar at codeaurora.org
Wed Apr 19 16:31:13 EDT 2017


On 4/19/2017 12:31 PM, Borislav Petkov wrote:
> On Tue, Apr 18, 2017 at 05:05:13PM -0600, Tyler Baicar wrote:
>> A RAS (Reliability, Availability, Serviceability) controller
>> may be a separate processor running in parallel with OS
>> execution, and may generate error records for consumption by
>> the OS. If the RAS controller produces multiple error records,
>> then they may be overwritten before the OS has consumed them.
>>
>> The Generic Hardware Error Source (GHES) v2 structure
>> introduces the capability for the OS to acknowledge the
>> consumption of the error record generated by the RAS
>> controller. A RAS controller supporting GHESv2 shall wait for
>> the acknowledgment before writing a new error record, thus
>> eliminating the race condition.
>>
>> Add support for parsing of GHESv2 sub-tables as well.
>>
>> Signed-off-by: Tyler Baicar <tbaicar at codeaurora.org>
>> CC: Jonathan (Zhixiong) Zhang <zjzhang at codeaurora.org>
>> Reviewed-by: James Morse <james.morse at arm.com>
>> ---
>>   drivers/acpi/apei/ghes.c | 55 +++++++++++++++++++++++++++++++++++++++++++++---
>>   drivers/acpi/apei/hest.c |  7 ++++--
>>   include/acpi/ghes.h      |  5 ++++-
>>   3 files changed, 61 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 79b3c9c..6d87ab7 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -46,6 +46,7 @@
>>   #include <linux/nmi.h>
>>   #include <linux/sched/clock.h>
>>   
>> +#include <acpi/actbl1.h>
>>   #include <acpi/ghes.h>
>>   #include <acpi/apei.h>
>>   #include <asm/tlbflush.h>
>> @@ -80,6 +81,10 @@
>>   	((struct acpi_hest_generic_status *)				\
>>   	 ((struct ghes_estatus_node *)(estatus_node) + 1))
>>   
>> +#define IS_HEST_TYPE_GENERIC_V2(ghes)				\
>> +	((struct acpi_hest_header *)ghes->generic)->type ==	\
> This is a nasty hack: casting the ghes->generic pointer to a pointer of its
> first member which is a acpi_hest_header.
>
> Why isn't this a nice inline function with proper dereferencing:
>
> static inline bool is_hest_type_generic_v2(struct ghes *ghes)
> {
>          return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
> }
>
> ?
I'll change it to this.
> Also, please integrate scripts/checkpatch.pl in your patch creation
> workflow. Some of the warnings/errors *actually* make sense.
>
>>   /*
>>    * This driver isn't really modular, however for the time being,
>>    * continuing to use module_param is the easiest way to remain
>> @@ -240,6 +245,17 @@ static int ghes_estatus_pool_expand(unsigned long len)
>>   	return 0;
>>   }
>>   
>> +static int map_gen_v2(struct ghes *ghes)
>> +{
>> +	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
>> +}
>> +
>> +static void unmap_gen_v2(struct ghes *ghes)
>> +{
>> +	apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
>> +	return;
>> +}
> Like this one, for example:
>
> WARNING: void function return statements are not generally useful
> #89: FILE: drivers/acpi/apei/ghes.c:257:
> +       return;
> +}
Will remove the return.
>
>> +
>>   static struct ghes *ghes_new(struct acpi_hest_generic *generic)
>>   {
>>   	struct ghes *ghes;
>> @@ -249,10 +265,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
>>   	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
>>   	if (!ghes)
>>   		return ERR_PTR(-ENOMEM);
>> +
>>   	ghes->generic = generic;
>> +	if (IS_HEST_TYPE_GENERIC_V2(ghes)) {
>> +		rc = map_gen_v2(ghes);
>> +		if (rc)
>> +			goto err_free;
>> +	}
>> +
>>   	rc = apei_map_generic_address(&generic->error_status_address);
>>   	if (rc)
>> -		goto err_free;
>> +		goto err_unmap_read_ack_addr;
>>   	error_block_length = generic->error_block_length;
>>   	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
>>   		pr_warning(FW_WARN GHES_PFX
>> @@ -264,13 +287,16 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
>>   	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
>>   	if (!ghes->estatus) {
>>   		rc = -ENOMEM;
>> -		goto err_unmap;
>> +		goto err_unmap_status_addr;
>>   	}
>>   
>>   	return ghes;
>>   
>> -err_unmap:
>> +err_unmap_status_addr:
>>   	apei_unmap_generic_address(&generic->error_status_address);
>> +err_unmap_read_ack_addr:
>> +	if (IS_HEST_TYPE_GENERIC_V2(ghes))
>> +		unmap_gen_v2(ghes);
>>   err_free:
>>   	kfree(ghes);
>>   	return ERR_PTR(rc);
>> @@ -280,6 +306,8 @@ static void ghes_fini(struct ghes *ghes)
>>   {
>>   	kfree(ghes->estatus);
>>   	apei_unmap_generic_address(&ghes->generic->error_status_address);
>> +	if (IS_HEST_TYPE_GENERIC_V2(ghes))
>> +		unmap_gen_v2(ghes);
>>   }
>>   
>>   static inline int ghes_severity(int severity)
>> @@ -649,6 +677,21 @@ static void ghes_estatus_cache_add(
>>   	rcu_read_unlock();
>>   }
>>   
>> +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2)
> If you name this function parameter to something shorter, say gv2, for
> example...
Will do.
>
>> +{
>> +	int rc;
>> +	u64 val = 0;
>> +
>> +	rc = apei_read(&val, &generic_v2->read_ack_register);
>> +	if (rc)
>> +		return rc;
>> +
>> +	val &= generic_v2->read_ack_preserve << generic_v2->read_ack_register.bit_offset;
>> +	val |= generic_v2->read_ack_write << generic_v2->read_ack_register.bit_offset;
> ... you can align those two nicely while remaining within the 80 cols width:
>
>          val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
>          val |= gv2->read_ack_write    << gv2->read_ack_register.bit_offset;
>
> and make them readable at a quick glance.
Will do.
>> +
>> +	return apei_write(val, &generic_v2->read_ack_register);
>> +}
>> +
>>   static int ghes_proc(struct ghes *ghes)
>>   {
>>   	int rc;
>> @@ -661,6 +704,12 @@ static int ghes_proc(struct ghes *ghes)
>>   			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
>>   	}
>>   	ghes_do_proc(ghes, ghes->estatus);
> This needs a comment why v2 needs to ACK the error. The commit message
> is not necessarily something we'll find quickly in the future.
Will do.

Thanks,
Tyler

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.




More information about the linux-arm-kernel mailing list