[Qemu-devel] [PATCH v3 1/4] ACPI: Add APEI GHES Table Generation support
gengdongjiu
gengdongjiu at huawei.com
Fri May 19 22:35:27 PDT 2017
Laszlo,
sorry for the late response.
On 2017/5/13 5:00, Laszlo Ersek wrote:
> On 04/30/17 07:35, Dongjiu Geng wrote:
>> This implements APEI GHES Table by passing the error cper info
>> to the guest via a fw_cfg_blob. After a CPER info is added, an
>> SEA/SEI exception will be injected into the guest OS.
>>
>> Below is the table layout, the max number of error soure is 11,
>> which is classified by notification type.
>>
>> etc/acpi/tables etc/hardware_errors
>> ================ ==========================================
>> +-----------+
>> +--------------+ | address | +-> +--------------+
>> | HEST + | registers | | | Error Status |
>> + +------------+ | +---------+ | | Data Block 1 |
>> | | GHES1 | --> | |address1 | --------+ | +------------+
>> | | GHES2 | --> | |address2 | ------+ | | CPER |
>> | | GHES3 | --> | |address3 | ----+ | | | CPER |
>> | | .... | --> | | ....... | | | | | CPER |
>> | | GHES10 | --> | |address10| -+ | | | | CPER |
>> +-+------------+ +-+---------+ | | | +-+------------+
>> | | |
>> | | +---> +--------------+
>> | | | Error Status |
>> | | | Data Block 2 |
>> | | | +------------+
>> | | | | CPER |
>> | | | | CPER |
>> | | +-+------------+
>> | |
>> | +-----> +--------------+
>> | | Error Status |
>> | | Data Block 3 |
>> | | +------------+
>> | | | CPER |
>> | +-+------------+
>> | ...........
>> +--------> +--------------+
>> | Error Status |
>> | Data Block 10|
>> | +------------+
>> | | CPER |
>> | | CPER |
>> | | CPER |
>> +-+------------+
>>
>> Signed-off-by: Dongjiu Geng <gengdongjiu at huawei.com>
>> ---
>> default-configs/arm-softmmu.mak | 1 +
>> hw/acpi/Makefile.objs | 1 +
>> hw/acpi/aml-build.c | 2 +
>> hw/acpi/hest_ghes.c | 203 +++++++++++++++++++++++++++++++++++
>> hw/arm/virt-acpi-build.c | 6 ++
>> include/hw/acpi/acpi-defs.h | 227 ++++++++++++++++++++++++++++++++++++++++
>> include/hw/acpi/aml-build.h | 1 +
>> include/hw/acpi/hest_ghes.h | 43 ++++++++
>> 8 files changed, 484 insertions(+)
>> create mode 100644 hw/acpi/hest_ghes.c
>> create mode 100644 include/hw/acpi/hest_ghes.h
>
> Disclaimer: I'm not an ACPI (or any kind of) QEMU maintainer, so I can
> only share my personal opinion.
I know it, I appreciated that you can find your free time to review it.
>
> (1) This patch is too big. It should be split in two parts at least.
>
> The first patch should contain the new ACPI structures and macros. The
> second patch should contain the generation feature.
OK, have splited it.
>
> I'll reorder the diff in my response.
thanks.
>
>> diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
>> index 4cc3630..27adede 100644
>> --- a/include/hw/acpi/acpi-defs.h
>> +++ b/include/hw/acpi/acpi-defs.h
>> @@ -295,6 +295,58 @@ typedef struct AcpiMultipleApicTable AcpiMultipleApicTable;
>> #define ACPI_APIC_GENERIC_TRANSLATOR 15
>> #define ACPI_APIC_RESERVED 16 /* 16 and greater are reserved */
>>
>
> (2) Please add a comment above the following macros: they come from the
> UEFI Spec 2.6, "N.2.5 Memory Error Section".
good suggestion.
>
>> +#define CPER_MEM_VALID_ERROR_STATUS 0x0001
>> +#define CPER_MEM_VALID_PA 0x0002
>> +#define CPER_MEM_VALID_PA_MASK 0x0004
>> +#define CPER_MEM_VALID_NODE 0x0008
>> +#define CPER_MEM_VALID_CARD 0x0010
>> +#define CPER_MEM_VALID_MODULE 0x0020
>> +#define CPER_MEM_VALID_BANK 0x0040
>> +#define CPER_MEM_VALID_DEVICE 0x0080
>> +#define CPER_MEM_VALID_ROW 0x0100
>> +#define CPER_MEM_VALID_COLUMN 0x0200
>> +#define CPER_MEM_VALID_BIT_POSITION 0x0400
>> +#define CPER_MEM_VALID_REQUESTOR_ID 0x0800
>> +#define CPER_MEM_VALID_RESPONDER_ID 0x1000
>> +#define CPER_MEM_VALID_TARGET_ID 0x2000
>
> (3) _ID should be dropped.
OK.
I copied these macros from kernel code "include/linux/cper.h"
I have planed to remove the unused macros
>
>> +#define CPER_MEM_VALID_ERROR_TYPE 0x4000
>> +#define CPER_MEM_VALID_RANK_NUMBER 0x8000
>> +#define CPER_MEM_VALID_CARD_HANDLE 0x10000
>> +#define CPER_MEM_VALID_MODULE_HANDLE 0x20000
>
> (4) I think if you are padding the first 16 macros with zeroes on the
> left, then they should be padded to five nibbles, given that you have 18
> macros.
>
> (5) Please prefix all of the macro names with "UEFI_".
>
>> +
>> +typedef struct {
>> + uint8_t b[16];
>> +} uuid_le;
>> +
>> +#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
>> +((uuid_le) \
>> +{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
>> + (b) & 0xff, ((b) >> 8) & 0xff, \
>> + (c) & 0xff, ((c) >> 8) & 0xff, \
>> + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) } })
>
> (6) This shouldn't be necessary -- or at least not here. We already have
> "include/qemu/uuid.h".
>
> If you need this macro, then in my opinion it should be moved to
> "include/qemu/uuid.h" (in a separate patch), and the macro should
> produce a compound literal of the QemuUUID structure type.
>
> And, as documented for QemuUUID, it should be in big endian byte order.
> For little-endian use, it should be byte-swapped with qemu_uuid_bswap().
I checked the struction definition in the "include/qemu/uuid.h", the QemuUUID may different
with this definition,here UUID is for the CPER section UUID. I see the spec all define them
to little-endian.
Platform Memory
•{0xA5BC1114, 0x6F64, 0x4EDE, {0xB8, 0x63, 0x3E, 0x83, 0xED, 0x7C, 0x83, 0xB1}}
PCIe}}
•{0xD995E954, 0xBBC1, 0x430F, {0xAD, 0x91, 0xB4, 0x4D, 0xCB, 0x3C, 0x6F, 0x35}}
Firmware Error Record Reference
•{0x81212A96, 0x09ED, 0x4996, {0x94, 0x71, 0x8D, 0x72, 0x9C, 0x8E, 0x69, 0xED}}
PCI/PCI-X Bus
•{0xC5753963, 0x3B84, 0x4095, {0xBF, 0x78, 0xED, 0xDA, 0xD3, 0xF9, 0xC9, 0xDD}}
PCI Component/Device
•{0xEB5E4685, 0xCA66, 0x4769, {0xB6, 0xA2, 0x26, 0x06, 0x8B, 0x00, 0x13, 0x26}}
DMAr Generic
•{0x5B51FEF7, 0xC79D, 0x4434, {0x8F, 0x1B, 0xAA,
•0x62, 0xDE, 0x3E, 0x2C, 0x64}}
>
>> +
>> +/* Platform Memory */
>> +#define CPER_SEC_PLATFORM_MEM \
>> + UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
>> + 0xED, 0x7C, 0x83, 0xB1)
>
> (7) Please add a comment this is from UEFI 2.6 "N.2.2 Section
> Descriptor".
>
> (8) Please prefix the macro with UEFI_.
>
>> +
>> +/* Values for Notify Type field above */
>> +
>> +enum acpi_hest_notify_types {
>> + ACPI_HEST_NOTIFY_POLLED = 0,
>> + ACPI_HEST_NOTIFY_EXTERNAL = 1,
>> + ACPI_HEST_NOTIFY_LOCAL = 2,
>> + ACPI_HEST_NOTIFY_SCI = 3,
>> + ACPI_HEST_NOTIFY_NMI = 4,
>> + ACPI_HEST_NOTIFY_CMCI = 5, /* ACPI 5.0 */
>> + ACPI_HEST_NOTIFY_MCE = 6, /* ACPI 5.0 */
>> + ACPI_HEST_NOTIFY_GPIO = 7, /* ACPI 6.0 */
>> + ACPI_HEST_NOTIFY_SEA = 8, /* ACPI 6.1 */
>> + ACPI_HEST_NOTIFY_SEI = 9, /* ACPI 6.1 */
>> + ACPI_HEST_NOTIFY_GSIV = 10, /* ACPI 6.1 */
>> + ACPI_HEST_NOTIFY_RESERVED = 11 /* 11 and greater are reserved */
>> +};
>> +
>
> (9) Please add a comment that this is from the ACPI 6.1 spec, "18.3.2.9
> Hardware Error Notification".
OK.
>
> (10) For better or worse, type names and struct tags in this header file
> use CamelCase, and generally start with the prefix Acpi. So I think the
> above should be called "AcpiHestNotifyType" (singular).
good suggestion.
>
> The enum constants look good.
>
>> /*
>> * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
>> */
>> @@ -475,6 +527,181 @@ struct AcpiSystemResourceAffinityTable
>> } QEMU_PACKED;
>> typedef struct AcpiSystemResourceAffinityTable AcpiSystemResourceAffinityTable;
>>
>> +#define ACPI_ADR_SPACE_SYSTEM_MEMORY (uint8_t) 0
>> +#define ACPI_ADR_SPACE_SYSTEM_IO (uint8_t) 1
>> +#define ACPI_ADR_SPACE_PCI_CONFIG (uint8_t) 2
>> +#define ACPI_ADR_SPACE_EC (uint8_t) 3
>> +#define ACPI_ADR_SPACE_SMBUS (uint8_t) 4
>> +#define ACPI_ADR_SPACE_CMOS (uint8_t) 5
>> +#define ACPI_ADR_SPACE_PCI_BAR_TARGET (uint8_t) 6
>> +#define ACPI_ADR_SPACE_IPMI (uint8_t) 7
>> +#define ACPI_ADR_SPACE_GPIO (uint8_t) 8
>> +#define ACPI_ADR_SPACE_GSBUS (uint8_t) 9
>> +#define ACPI_ADR_SPACE_PLATFORM_COMM (uint8_t) 10
>
> (11) These macros are not necessary. Instead, please extend the
> AmlRegionSpace enum type in "include/hw/acpi/aml-build.h".
OK.
>
> (12) Additionally, where do the values 5 through 9 come from? ACPI 6.1
> "5.2.3.2 Generic Address Structure" leaves them reserved.
good point. the values 5 through 9 from the kernel code: include/acpi/actypes.h
#define ACPI_ADR_SPACE_SYSTEM_MEMORY (acpi_adr_space_type) 0
#define ACPI_ADR_SPACE_SYSTEM_IO (acpi_adr_space_type) 1
#define ACPI_ADR_SPACE_PCI_CONFIG (acpi_adr_space_type) 2
#define ACPI_ADR_SPACE_EC (acpi_adr_space_type) 3
#define ACPI_ADR_SPACE_SMBUS (acpi_adr_space_type) 4
#define ACPI_ADR_SPACE_CMOS (acpi_adr_space_type) 5
#define ACPI_ADR_SPACE_PCI_BAR_TARGET (acpi_adr_space_type) 6
#define ACPI_ADR_SPACE_IPMI (acpi_adr_space_type) 7
#define ACPI_ADR_SPACE_GPIO (acpi_adr_space_type) 8
#define ACPI_ADR_SPACE_GSBUS (acpi_adr_space_type) 9
#define ACPI_ADR_SPACE_PLATFORM_COMM (acpi_adr_space_type) 10
I planned remove the values 5 through 9.
>
>> +
>> +/* GAS - Generic Address Structure */
>> +struct acpi_generic_address {
>> + uint8_t space_id; /* Address space where
>> + *struct or register exists
>> + */
>> + uint8_t bit_width; /* Size in bits of given register */
>> + uint8_t bit_offset; /* Bit offset within the register */
>> + uint8_t access_width; /* Minimum Access size (ACPI 3.0) */
>> + uint64_t address; /* 64-bit address of struct or register */
>> +} __attribute__ ((packed));
>> +
>
> (13) This structure is already defined, see AcpiGenericAddress.
>
>> +/* Hardware Error Notification */
>> +struct acpi_hest_notify {
>> + uint8_t type;
>> + uint8_t length;
>> + uint16_t config_write_enable;
>> + uint32_t poll_interval;
>> + uint32_t vector;
>> + uint32_t polling_threshold_value;
>> + uint32_t polling_threshold_window;
>> + uint32_t error_threshold_value;
>> + uint32_t error_threshold_window;
>> +};
>
> (14) Please add a comment that this is from the ACPI 6.1 spec, "18.3.2.9
> Hardware Error Notification".
>
> (15) The structure should be called AcpiHestNotify. Please also add a
> direct typedef for it, similarly to the other struct AcpiXxxx types seen
> in this header.
>
> (16) To the "type" field, please append a comment that the values come
> from AcpiHestNotifyType.
ok.
>
> (17) This structure should be packed. Please add QEMU_PACKED between the
> closing brace and the semicolon.
OK, have modified it.
>
>> +
>> +enum acpi_hest_types {
>> + ACPI_HEST_TYPE_IA32_CHECK = 0,
>> + ACPI_HEST_TYPE_IA32_CORRECTED_CHECK = 1,
>> + ACPI_HEST_TYPE_IA32_NMI = 2,
>> + ACPI_HEST_TYPE_NOT_USED3 = 3,
>> + ACPI_HEST_TYPE_NOT_USED4 = 4,
>> + ACPI_HEST_TYPE_NOT_USED5 = 5,
>> + ACPI_HEST_TYPE_AER_ROOT_PORT = 6,
>> + ACPI_HEST_TYPE_AER_ENDPOINT = 7,
>> + ACPI_HEST_TYPE_AER_BRIDGE = 8,
>> + ACPI_HEST_TYPE_GENERIC_ERROR = 9,
>> + ACPI_HEST_TYPE_GENERIC_ERROR_V2 = 10,
>> + ACPI_HEST_TYPE_RESERVED = 11 /* 11 and greater are reserved */
>> +};
>
> (18) Please add a comment that these are from ACPI 6.1, sections
> "18.3.2.1 IA-32 Architecture Machine Check Exception" through "18.3.2.8
> Generic Hardware Error Source version 2".
>
> (19) The type name should be "AcpiHestSourceType" (singular).
>
> (20) I think the enum constants should be renamed to
> ACPI_HEST_SOURCE_xxx, from ACPI_HEST_TYPE_xxx.
>
> (21) I think the NOT_USED{3,4,5} enum constants should be removed.
OK.
>
>> +
>> +/* Values for block_status flags above */
>
> (22) Here I think we should only say, 'Block Status bitmasks from ACPI
> 6.1, "18.3.2.7.1 Generic Error Data"'. The block_status field that you
> refer to is not above, it comes later.
>
>> +#define ACPI_BERT_UNCORRECTABLE (1)
>> +#define ACPI_BERT_CORRECTABLE (1 << 1)
>> +#define ACPI_BERT_MULTIPLE_UNCORRECTABLE (1 << 2)
>> +#define ACPI_BERT_MULTIPLE_CORRECTABLE (1 << 3)
>> +/* 8 bits, error count */
>> +#define ACPI_BERT_ERROR_ENTRY_COUNT (0xFF << 4)
>>
>
> (23) Any particular reason to call these BERT? The "Boot Error Record
> Table" is specified in "18.3.1 Boot Error Source", but the block status
> bitmasks don't look related.
>
> To me ACPI_GEBS_xxx ("generic error block status") seems like a more
> fitting prefix.
good point, it indeed confused other people with such "ACPI_BERT_xxxx" prefix.
>
> +
>> +/* Generic Hardware Error Source Structure */
>> +struct AcpiGenericHardwareErrorSource {
>> + uint16_t type;
>> + uint16_t source_id;
>> + uint16_t related_source_id;
>> + uint8_t flags;
>> + uint8_t enabled;
>> + uint32_t number_of_records;
>> + uint32_t max_sections_per_record;
>> + uint32_t max_raw_data_length;
>> + struct acpi_generic_address error_status_address;
>> + struct acpi_hest_notify notify;
>> + uint32_t error_status_block_length;
>> +} QEMU_PACKED;
>> +typedef struct AcpiGenericHardwareErrorSource AcpiGenericHardwareErrorSource;
>
> (24) This looks good to me in general.
>
> I suggest adding a reference to ACPI 6.1 "18.3.2.7 Generic Hardware
> Error Source". Also, I think we should mention that "type" has to be
> ACPI_HEST_SOURCE_GENERIC_ERROR.
Ok.
>
>> +
>> +/* Generic Hardware Error Source , version 2 */
>> +struct AcpiGenericHardwareErrorSourceV2 {
>> + uint16_t type;
>> + uint16_t source_id;
>> + uint16_t related_source_id;
>> + uint8_t flags;
>> + uint8_t enabled;
>> + uint32_t number_of_records;
>> + uint32_t max_sections_per_record;
>> + uint32_t max_raw_data_length;
>> + struct acpi_generic_address error_status_address;
>> + struct acpi_hest_notify notify;
>> + uint32_t error_status_block_length;
>> + struct acpi_generic_address read_ack_register;
>> + uint64_t read_ack_preserve;
>> + uint64_t read_ack_write;
>> +} QEMU_PACKED;
>> +typedef struct AcpiGenericHardwareErrorSourceV2
>> + AcpiGenericHardwareErrorSourceV2;
>
> (25) Same comments; I suggest adding a reference to "18.3.2.8 Generic
> Hardware Error Source version 2", and mentioning
> ACPI_HEST_SOURCE_GENERIC_ERROR_V2 for the "type" field.
>
>> +
>> +/* Generic Error Status block */
>> +
>> +struct AcpiGenericErrorStatus {
>> + uint32_t block_status;
>> + uint32_t raw_data_offset;
>> + uint32_t raw_data_length;
>> + uint32_t data_length;
>> + uint32_t error_severity;
>> +};
>> +typedef struct AcpiGenericErrorStatus AcpiGenericErrorStatus;
>> +
>
> (26) Please mention that this is from ACPI 6.1, "18.3.2.7.1 Generic
> Error Data".
>
> (27) Near the block_status field, we should point out that it is a
> bitmask composed of ACPI_GEBS_xxx macros.
>
> (28) QEMU_PACKED is missing. (It will make no difference in practice,
> but I recommend it for consistency and documentation purposes.)
OK. will do it.
>
>> +/* Generic Error Data entry */
>> +
>> +struct AcpiGenericErrorData {
>> + uint8_t section_type[16];
>> + uint32_t error_severity;
>> + uint16_t revision;
>> + uint8_t validation_bits;
>> + uint8_t flags;
>> + uint32_t error_data_length;
>> + uint8_t fru_id[16];
>> + uint8_t fru_text[20];
>> +};
>> +typedef struct AcpiGenericErrorData AcpiGenericErrorData;
>
> (29) Please point to ACPI 6.1, "18.3.2.7.1 Generic Error Data" again, in
> the leading comment.
>
> (30) QEMU_PACKED is missing.
>
> (31) I think we should use the QemuUUID type for the "section_type"
> field. And, in order to make it clear that it has little endian
> encoding, let's call it "section_type_le".
>
> An added benefit is that in the code, the field can be set with a simple
> structure assignment from UEFI_CPER_SEC_PLATFORM_MEM (and then can be
> byte-swapped in place, for little endiannes, with qemu_uuid_bswap()).
good suggestion, will follow that.
>
>> +
>> +/* Extension for revision 0x0300 */
>> +struct AcpiGenericErrorDataV300 {
>> + uint8_t section_type[16];
>> + uint32_t error_severity;
>> + uint16_t revision;
>> + uint8_t validation_bits;
>> + uint8_t flags;
>> + uint32_t error_data_length;
>> + uint8_t fru_id[16];
>> + uint8_t fru_text[20];
>> + uint64_t time_stamp;
>> +};
>> +typedef struct AcpiGenericErrorDataV300 AcpiGenericErrorDataV300;
>> +
>
> (32) Same comments as (29), (30), (31) above.
>
> (33) Actually, do we need both AcpiGenericErrorData and
> AcpiGenericErrorDataV300? In the code we seem to be using only the
> former. On the other hand, in the spec I can see only the latter. Where
> does AcpiGenericErrorData come from?
>
The AcpiGenericErrorData come from "ACPI_5.0A", link is here: http://www.acpi.info/spec50a.htm
The revision number of the error data is 0x0201 for "AcpiGenericErrorData"
The revision number of the error data is 0x0300 for "AcpiGenericErrorDataV300"
>> +enum {
>> + CPER_SEV_RECOVERABLE,
>> + CPER_SEV_FATAL,
>> + CPER_SEV_CORRECTED,
>> + CPER_SEV_INFORMATIONAL,
>> +};
>
> (34) I suggest giving a name to this type, for example
> AcpiGenericErrorSeverity.
>
> (35) The enumeration constants should start with ACPI_.
OK.
>
> (36) I suggest moving this type above AcpiGenericErrorData and
> AcpiGenericErrorDataV300, and remarking on the "error_severity" fields
> that they take their values from AcpiGenericErrorSeverity.
>
> (37) Where does "INFORMATIONAL" come from? In ACPI 6.1, "18.3.2.7.1
> Generic Error Data", I see "None" for value 3.
>
>> +
>> +/* Memory Error Section */
>> +struct cper_sec_mem_err {
>> + uint64_t validation_bits;
>> + uint64_t error_status;
>> + uint64_t physical_addr;
>> + uint64_t physical_addr_mask;
>> + uint16_t node;
>> + uint16_t card;
>> + uint16_t module;
>> + uint16_t bank;
>> + uint16_t device;
>> + uint16_t row;
>> + uint16_t column;
>> + uint16_t bit_pos;
>> + uint64_t requestor_id;
>> + uint64_t responder_id;
>> + uint64_t target_id;
>> + uint8_t error_type;
>> + uint8_t reserved;
>> + uint16_t rank;
>> + uint16_t mem_array_handle; /* card handle in UEFI 2.4 */
>> + uint16_t mem_dev_handle; /* module handle in UEFI 2.4 */
>> +};
>> + typedef struct cper_sec_mem_err cper_sec_mem_err;
>
> (38) Please add a comment that is is from UEFI 2.6, "N.2.5 Memory Error
> Section".
>
> (39) The structure and the typedef should be called "UefiCperSecMemErr".
>
> (40) I suggest adding a comment to "validation_bits" that it is a
> bitmask composed of CPER_MEM_VALID_xxx macros.
>
> (41) QEMU_PACKED is missing.
will modify it.
>
>> +
>> +/*
>> + * HEST Description Table
>> + */
>> +struct AcpiHardwareErrorSourceTable {
>> + ACPI_TABLE_HEADER_DEF /* ACPI common table header */
>> + uint32_t error_source_count;
>> +} QEMU_PACKED;
>> +typedef struct AcpiHardwareErrorSourceTable AcpiHardwareErrorSourceTable;
>> +
>> #define ACPI_SRAT_PROCESSOR_APIC 0
>> #define ACPI_SRAT_MEMORY 1
>> #define ACPI_SRAT_PROCESSOR_x2APIC 2
>
> Next file:
>
>> diff --git a/include/hw/acpi/hest_ghes.h b/include/hw/acpi/hest_ghes.h
>> new file mode 100644
>> index 0000000..0cadc2b
>> --- /dev/null
>> +++ b/include/hw/acpi/hest_ghes.h
>> @@ -0,0 +1,43 @@
>> +#ifndef ACPI_GHES_H
>> +#define ACPI_GHES_H
>> +
>> +#include "hw/acpi/bios-linker-loader.h"
>> +
>> +#define GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
>> +#define GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
>> +
>> +#define GAS_ADDRESS_OFFSET 4
>> +#define ERROR_STATUS_ADDRESS_OFFSET 20
>> +#define NOTIFICATION_STRUCTURE 32
>> +
>> +#define BFAPEI_OK 0
>> +#define BFAPEI_FAIL 1
>> +
>> +/* The max number of error source, the error sources
>> + * are classified by notification type, below is the definition
>> + * 0 - Polled
>> + * 1 - External Interrupt
>> + * 2 - Local Interrupt
>> + * 3 - SCI
>> + * 4 - NMI
>> + * 5 - CMCI
>> + * 6 - MCE
>> + * 7 - GPIO-Signal
>> + * 8 - ARMv8 SEA
>> + * 9 - ARMv8 SEI
>> + * 10 - External Interrupt - GSIV
>> + */
>> +#define MAX_ERROR_SOURCE_COUNT_V6 11
>
> I'll have to review this header file more thoroughly, once I see the
> code that references these macros. For now, I have one comment:
>
> (42) I think the notification type list should be removed from this
> location. Also, the open-coded value 11 should be replaced with
> the ACPI_HEST_NOTIFY_RESERVED enumeration constant.
agree with you. in your suggested way, it is easy to extend.
>
> I will try to continue reviewing this patch sometime next week (second
> half of the week at the earliest, I think).
Ok, it is not hurry, you can find your free time to review it
>
> Please feel free to disagree with my comments; I prefer to write down
> everything that crosses my mind. It's encouraged to raise
> counter-arguments.
many thanks for your detailed suggestion.
>
> Thanks
> Laszlo
>
>> +/* The max size in Bytes for one error block */
>> +#define MAX_RAW_DATA_LENGTH 0x1000
>> +
>> +typedef struct GhesErrorState {
>> + uint64_t physical_addr;
>> + uint64_t ghes_addr_le[8];
>> +} GhesErrorState;
>> +
>> +void ghes_build_acpi(GArray *table_data, GArray *hardware_error,
>> + BIOSLinker *linker);
>> +void ghes_add_fw_cfg(FWCfgState *s, GArray *guid);
>> +void ghes_update_guest(uint32_t notify, uint64_t physical_address);
>> +#endif
>>
>
>> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
>> index 0835e59..e7ab5dc 100644
>> --- a/hw/arm/virt-acpi-build.c
>> +++ b/hw/arm/virt-acpi-build.c
>> @@ -45,6 +45,8 @@
>> #include "hw/arm/virt.h"
>> #include "sysemu/numa.h"
>> #include "kvm_arm.h"
>> +#include "hw/acpi/vmgenid.h"
>> +#include "hw/acpi/hest_ghes.h"
>>
>> #define ARM_SPI_BASE 32
>> #define ACPI_POWER_BUTTON_DEVICE "PWRB"
>> @@ -778,6 +780,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
>> acpi_add_table(table_offsets, tables_blob);
>> build_spcr(tables_blob, tables->linker, vms);
>>
>> + acpi_add_table(table_offsets, tables_blob);
>> + ghes_build_acpi(tables_blob, tables->hardware_errors, tables->linker);
>> +
>> if (nb_numa_nodes > 0) {
>> acpi_add_table(table_offsets, tables_blob);
>> build_srat(tables_blob, tables->linker, vms);
>> @@ -892,6 +897,7 @@ void virt_acpi_setup(VirtMachineState *vms)
>>
>> build_state->rsdp_mr = acpi_add_rom_blob(build_state, tables.rsdp,
>> ACPI_BUILD_RSDP_FILE, 0);
>> + ghes_add_fw_cfg(vms->fw_cfg, tables.hardware_errors);
>>
>> qemu_register_reset(virt_acpi_build_reset, build_state);
>> virt_acpi_build_reset(build_state);
>
>> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
>> index 1e3bd2b..d5f1552 100644
>> --- a/default-configs/arm-softmmu.mak
>> +++ b/default-configs/arm-softmmu.mak
>> @@ -121,3 +121,4 @@ CONFIG_ACPI=y
>> CONFIG_SMBIOS=y
>> CONFIG_ASPEED_SOC=y
>> CONFIG_GPIO_KEY=y
>> +CONFIG_ACPI_APEI_GENERATION=y
>
>> diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
>> index 11c35bc..776b46e 100644
>> --- a/hw/acpi/Makefile.objs
>> +++ b/hw/acpi/Makefile.objs
>> @@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
>> common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
>> common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
>> common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
>> +common-obj-$(CONFIG_ACPI_APEI_GENERATION) += hest_ghes.o
>> common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
>>
>> common-obj-y += acpi_interface.o
>
>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>> index c6f2032..802b98d 100644
>> --- a/hw/acpi/aml-build.c
>> +++ b/hw/acpi/aml-build.c
>> @@ -1560,6 +1560,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
>> tables->table_data = g_array_new(false, true /* clear */, 1);
>> tables->tcpalog = g_array_new(false, true /* clear */, 1);
>> tables->vmgenid = g_array_new(false, true /* clear */, 1);
>> + tables->hardware_errors = g_array_new(false, true /* clear */, 1);
>> tables->linker = bios_linker_loader_init();
>> }
>>
>> @@ -1570,6 +1571,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
>> g_array_free(tables->table_data, true);
>> g_array_free(tables->tcpalog, mfre);
>> g_array_free(tables->vmgenid, mfre);
>> + g_array_free(tables->hardware_errors, mfre);
>> }
>>
>> /* Build rsdt table */
>
>> diff --git a/hw/acpi/hest_ghes.c b/hw/acpi/hest_ghes.c
>> new file mode 100644
>> index 0000000..91d382e
>> --- /dev/null
>> +++ b/hw/acpi/hest_ghes.c
>> @@ -0,0 +1,203 @@
>> +/*
>> + * APEI GHES table Generation
>> + *
>> + * Copyright (C) 2017 huawei.
>> + *
>> + * Author: Dongjiu Geng <gengdongjiu at huawei.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qmp-commands.h"
>> +#include "hw/acpi/acpi.h"
>> +#include "hw/acpi/aml-build.h"
>> +#include "hw/acpi/hest_ghes.h"
>> +#include "hw/nvram/fw_cfg.h"
>> +#include "sysemu/sysemu.h"
>> +
>> +static int ghes_generate_cper_record(uint64_t block_error_address,
>> + uint64_t error_physical_addr)
>> +{
>> + AcpiGenericErrorStatus block;
>> + AcpiGenericErrorData *gdata;
>> + struct cper_sec_mem_err *mem_err;
>> + uint64_t block_data_length;
>> + unsigned char *buffer;
>> +
>> + cpu_physical_memory_read(block_error_address, &block,
>> + sizeof(AcpiGenericErrorStatus));
>> +
>> + block_data_length = sizeof(AcpiGenericErrorStatus) + block.data_length;
>> +
>> + /* If the Generic Error Status Block is NULL, update
>> + * the block header
>> + */
>> + if (!block.block_status) {
>> + block.block_status = ACPI_BERT_UNCORRECTABLE;
>> + block.error_severity = CPER_SEV_FATAL;
>> + }
>> +
>> + block.data_length += sizeof(AcpiGenericErrorData);
>> + block.data_length += sizeof(struct cper_sec_mem_err);
>> +
>> + /* Write back the Generic Error Status Block to guest memory */
>> + cpu_physical_memory_write(block_error_address, &block,
>> + sizeof(AcpiGenericErrorStatus));
>> +
>> + /* Fill in Generic Error Data Entry */
>> + buffer = g_malloc(sizeof(AcpiGenericErrorData) + sizeof(cper_sec_mem_err));
>> + memset(buffer, 0, sizeof(AcpiGenericErrorData) + sizeof(cper_sec_mem_err));
>> + gdata = (AcpiGenericErrorData *)buffer;
>> +
>> + memcpy(gdata->section_type, (void *) &CPER_SEC_PLATFORM_MEM,
>> + sizeof(uuid_le));
>> + gdata->error_data_length = sizeof(struct cper_sec_mem_err);
>> +
>> + mem_err = (struct cper_sec_mem_err *) (gdata + 1);
>> +
>> + /* In order to simplify simulation, hardcode the CPER section to memory
>> + * section.
>> + */
>> + mem_err->validation_bits |= CPER_MEM_VALID_ERROR_TYPE;
>> + mem_err->error_type = 3;
>> +
>> + mem_err->validation_bits |= CPER_MEM_VALID_PA;
>> + mem_err->physical_addr = error_physical_addr;
>> +
>> + mem_err->validation_bits |= CPER_MEM_VALID_CARD | CPER_MEM_VALID_MODULE |
>> + CPER_MEM_VALID_BANK | CPER_MEM_VALID_ROW |
>> + CPER_MEM_VALID_COLUMN | CPER_MEM_VALID_BIT_POSITION;
>> + mem_err->card = 1;
>> + mem_err->module = 2;
>> + mem_err->bank = 3;
>> + mem_err->row = 1;
>> + mem_err->column = 2;
>> + mem_err->bit_pos = 5;
>> +
>> + mem_err->validation_bits |= CPER_MEM_VALID_ERROR_STATUS;
>> + mem_err->error_status = 4 << 8;
>> +
>> + /* Write back the Generic Error Data Entry to guest memory */
>> + cpu_physical_memory_write(block_error_address + block_data_length, buffer,
>> + sizeof(AcpiGenericErrorData) + sizeof(cper_sec_mem_err));
>> +
>> + g_free(buffer);
>> + return BFAPEI_OK;
>> +}
>> +
>> +void ghes_build_acpi(GArray *table_data, GArray *hardware_error,
>> + BIOSLinker *linker)
>> +{
>> + Aml *hest;
>> + uint32_t address_registers_offset;
>> + AcpiTableHeader *header;
>> + AcpiGenericHardwareErrorSource *error_source;
>> + int i;
>> +
>> + int block_reqr_size = sizeof(uint64_t) + MAX_RAW_DATA_LENGTH;
>> +
>> + /* New address register and Error status block table size*/
>> + g_array_set_size(hardware_error, MAX_ERROR_SOURCE_COUNT_V6
>> + * block_reqr_size);
>> +
>> + /* Put this in a HEST table */
>> + hest = init_aml_allocator();
>> + address_registers_offset = table_data->len
>> + + sizeof(AcpiHardwareErrorSourceTable)
>> + + ERROR_STATUS_ADDRESS_OFFSET
>> + + GAS_ADDRESS_OFFSET;
>> + /* Reserve space for HEST table size*/
>> + acpi_data_push(hest->buf, sizeof(AcpiHardwareErrorSourceTable)
>> + + MAX_ERROR_SOURCE_COUNT_V6
>> + * sizeof(AcpiGenericHardwareErrorSource));
>> +
>> + g_array_append_vals(table_data, hest->buf->data, hest->buf->len);
>> + /* Allocate guest memory for the Data fw_cfg blob */
>> + bios_linker_loader_alloc(linker, GHES_ERRORS_FW_CFG_FILE,
>> + hardware_error, 4096,
>> + false /* page boundary, high memory */);
>> + header = (AcpiTableHeader *)(table_data->data
>> + + table_data->len - hest->buf->len);
>> + *(uint32_t *)(header + 1) = MAX_ERROR_SOURCE_COUNT_V6;
>> + error_source = (AcpiGenericHardwareErrorSource *)((char *)header
>> + + sizeof(AcpiHardwareErrorSourceTable));
>> +
>> + for (i = 0; i < MAX_ERROR_SOURCE_COUNT_V6; i++) {
>> + error_source->type = ACPI_HEST_TYPE_GENERIC_ERROR;
>> + error_source->source_id = 0;
>> + error_source->related_source_id = 0xffff;
>> + error_source->flags = 0;
>> + error_source->enabled = 1;
>> + error_source->number_of_records = 1;
>> + error_source->max_sections_per_record = 1;
>> + error_source->max_raw_data_length = MAX_RAW_DATA_LENGTH;
>> + error_source->error_status_address.space_id =
>> + ACPI_ADR_SPACE_SYSTEM_MEMORY;
>> + error_source->error_status_address.bit_width = 64;
>> + error_source->error_status_address.bit_offset = 0;
>> + error_source->error_status_address.access_width = 4;
>> + error_source->notify.type = i;
>> + error_source->notify.length = sizeof(AcpiGenericHardwareErrorSource);
>> +
>> + bios_linker_loader_add_pointer(linker, GHES_ERRORS_FW_CFG_FILE,
>> + sizeof(uint64_t) * i, sizeof(uint64_t),
>> + GHES_ERRORS_FW_CFG_FILE,
>> + MAX_ERROR_SOURCE_COUNT_V6 * sizeof(uint64_t) +
>> + i * MAX_RAW_DATA_LENGTH);
>> + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
>> + address_registers_offset
>> + + i * sizeof(AcpiGenericHardwareErrorSource),
>> + sizeof(uint32_t), GHES_ERRORS_FW_CFG_FILE,
>> + i * sizeof(uint64_t));
>> +
>> + bios_linker_loader_write_pointer(linker, GHES_DATA_ADDR_FW_CFG_FILE,
>> + i * sizeof(uint64_t), sizeof(uint64_t),
>> + GHES_ERRORS_FW_CFG_FILE,
>> + MAX_ERROR_SOURCE_COUNT_V6 * sizeof(uint64_t) +
>> + i * MAX_RAW_DATA_LENGTH);
>> + error_source++;
>> + }
>> +
>> + build_header(linker, table_data,
>> + (void *)header, "HEST", hest->buf->len, 1, NULL, "GHES");
>> +
>> + free_aml_allocator();
>> +}
>> +
>> +static GhesErrorState ges;
>> +void ghes_add_fw_cfg(FWCfgState *s, GArray *hardware_error)
>> +{
>> +
>> + int block_reqr_size = sizeof(uint64_t) + MAX_RAW_DATA_LENGTH;
>> + int size = MAX_ERROR_SOURCE_COUNT_V6 * block_reqr_size;
>> +
>> + /* Create a read-only fw_cfg file for GHES */
>> + fw_cfg_add_file(s, GHES_ERRORS_FW_CFG_FILE, hardware_error->data,
>> + size);
>> + /* Create a read-write fw_cfg file for Address */
>> + fw_cfg_add_file_callback(s, GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
>> + &(ges.ghes_addr_le[0]),
>> + sizeof(uint64_t) * MAX_ERROR_SOURCE_COUNT_V6,
>> + false);
>> +}
>> +
>> +void ghes_update_guest(uint32_t notify, uint64_t physical_address)
>> +{
>> + uint64_t block_error_addr;
>> +
>> + if (physical_address) {
>> + ges.physical_addr = physical_address;
>> + block_error_addr = ges.ghes_addr_le[notify];
>> + block_error_addr = le32_to_cpu(block_error_addr);
>> +
>> + /* A zero value in ghes_addr means that BIOS has not yet written
>> + * the address
>> + */
>> + if (block_error_addr) {
>> + ghes_generate_cper_record(block_error_addr, physical_address);
>> + }
>> + }
>> +}
>
>> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
>> index 00c21f1..c1d15b3 100644
>> --- a/include/hw/acpi/aml-build.h
>> +++ b/include/hw/acpi/aml-build.h
>> @@ -211,6 +211,7 @@ struct AcpiBuildTables {
>> GArray *rsdp;
>> GArray *tcpalog;
>> GArray *vmgenid;
>> + GArray *hardware_errors;
>> BIOSLinker *linker;
>> } AcpiBuildTables;
>>
>
> .
>
More information about the linux-arm-kernel
mailing list