[PATCH v2 11/11] RAS: add DeviceTree firmware-first CPER provider
Ahmed Tiba
ahmed.tiba at arm.com
Fri Feb 20 05:42:29 PST 2026
Add a DeviceTree firmware-first CPER provider that reuses the shared
GHES helpers, wire it into the RAS Kconfig/Makefile and document it in
the admin guide. Update MAINTAINERS now that the driver exists.
Signed-off-by: Ahmed Tiba <ahmed.tiba at arm.com>
---
Documentation/admin-guide/RAS/main.rst | 18 +++
MAINTAINERS | 1 +
drivers/acpi/apei/apei-internal.h | 10 +-
drivers/acpi/apei/ghes_cper.c | 2 +
drivers/ras/Kconfig | 12 ++
drivers/ras/Makefile | 1 +
drivers/ras/esource-dt.c | 264 +++++++++++++++++++++++++++++++++
include/acpi/ghes_cper.h | 9 ++
8 files changed, 308 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/RAS/main.rst b/Documentation/admin-guide/RAS/main.rst
index 5a45db32c49b..4ffabaaeabb1 100644
--- a/Documentation/admin-guide/RAS/main.rst
+++ b/Documentation/admin-guide/RAS/main.rst
@@ -205,6 +205,24 @@ Architecture (MCA)\ [#f3]_.
.. [#f3] For more details about the Machine Check Architecture (MCA),
please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
+Firmware-first CPER via DeviceTree
+----------------------------------
+
+Some systems expose Common Platform Error Record (CPER) data
+via DeviceTree instead of ACPI HEST tables.
+Enable ``CONFIG_RAS_ESOURCE_DT`` to build the ``drivers/ras/esource-dt.c``
+driver and describe the CPER error source buffer with the
+``Documentation/devicetree/bindings/firmware/arm,ras-ffh.yaml`` binding.
+The driver reuses the GHES CPER helper object in
+``drivers/acpi/apei/ghes_cper.c`` so the logging, notifier chains, and
+memory failure handling match the ACPI GHES behaviour even when
+ACPI is disabled.
+
+Once a platform describes a firmware-first provider, both ACPI GHES and the
+DeviceTree driver reuse the same code paths. This keeps the behaviour
+consistent regardless of whether the error source is described via ACPI
+tables or DeviceTree.
+
EDAC - Error Detection And Correction
*************************************
diff --git a/MAINTAINERS b/MAINTAINERS
index 47db7877b485..fa6113b482b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22031,6 +22031,7 @@ RAS ERROR STATUS
M: Ahmed Tiba <ahmed.tiba at arm.com>
S: Maintained
F: Documentation/devicetree/bindings/firmware/arm,ras-ffh.yaml
+F: drivers/ras/esource-dt.c
RAS INFRASTRUCTURE
M: Tony Luck <tony.luck at intel.com>
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 77c10a7a7a9f..c16ac541f15b 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -8,6 +8,7 @@
#define APEI_INTERNAL_H
#include <linux/acpi.h>
+#include <acpi/ghes_cper.h>
struct apei_exec_context;
@@ -120,15 +121,6 @@ int apei_exec_collect_resources(struct apei_exec_context *ctx,
struct dentry;
struct dentry *apei_get_debugfs_dir(void);
-static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
-{
- if (estatus->raw_data_length)
- return estatus->raw_data_offset + \
- estatus->raw_data_length;
- else
- return sizeof(*estatus) + estatus->data_length;
-}
-
int apei_osc_setup(void);
int einj_get_available_error_type(u32 *type, int einj_action);
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
index 29b790160e91..9b2d1b8cf9f4 100644
--- a/drivers/acpi/apei/ghes_cper.c
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -42,7 +42,9 @@
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
+#ifdef CONFIG_ACPI_APEI
#include "apei-internal.h"
+#endif
ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
index fc4f4bb94a4c..ea6d96713020 100644
--- a/drivers/ras/Kconfig
+++ b/drivers/ras/Kconfig
@@ -34,6 +34,18 @@ if RAS
source "arch/x86/ras/Kconfig"
source "drivers/ras/amd/atl/Kconfig"
+config RAS_ESOURCE_DT
+ bool "DeviceTree firmware-first CPER error source block provider"
+ depends on OF
+ depends on ARM64
+ select GHES_CPER_HELPERS
+ help
+ Enable support for firmware-first Common Platform Error Record (CPER)
+ error source block providers that are described via DeviceTree
+ instead of ACPI HEST tables. The driver reuses the existing GHES
+ CPER helpers so the error processing matches the ACPI code paths,
+ but it can be built even when ACPI is disabled.
+
config RAS_FMPM
tristate "FRU Memory Poison Manager"
default m
diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
index 11f95d59d397..53558a1707b3 100644
--- a/drivers/ras/Makefile
+++ b/drivers/ras/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_RAS) += ras.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
obj-$(CONFIG_RAS_CEC) += cec.o
+obj-$(CONFIG_RAS_ESOURCE_DT) += esource-dt.o
obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o
obj-y += amd/atl/
diff --git a/drivers/ras/esource-dt.c b/drivers/ras/esource-dt.c
new file mode 100644
index 000000000000..b575a2258536
--- /dev/null
+++ b/drivers/ras/esource-dt.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * DeviceTree provider for firmware-first CPER error source block.
+ *
+ * This driver shares the GHES CPER helpers so we keep the reporting and
+ * notifier behaviour identical to ACPI GHES
+ *
+ * Copyright (C) 2025 ARM Ltd.
+ * Author: Ahmed Tiba <ahmed.tiba at arm.com>
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/panic.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <acpi/ghes.h>
+#include <acpi/ghes_cper.h>
+
+static atomic_t ghes_ffh_source_ids = ATOMIC_INIT(0);
+
+struct ghes_ffh_ack {
+ void __iomem *addr;
+ u64 preserve;
+ u64 set;
+ u8 width;
+ bool present;
+};
+
+struct ghes_ffh {
+ struct device *dev;
+ void __iomem *status;
+ size_t status_len;
+
+ struct ghes_ffh_ack ack;
+
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+
+ bool sync;
+ int irq;
+
+ /* Serializes access to the firmware-owned buffer. */
+ spinlock_t lock;
+};
+
+static int ghes_ffh_init_pool(void)
+{
+ if (ghes_estatus_pool)
+ return 0;
+
+ return ghes_estatus_pool_init(1);
+}
+
+static int ghes_ffh_copy_status(struct ghes_ffh *ctx)
+{
+ memcpy_fromio(ctx->estatus, ctx->status, ctx->status_len);
+ return 0;
+}
+
+static void ghes_ffh_ack(struct ghes_ffh *ctx)
+{
+ u64 val;
+
+ if (!ctx->ack.present)
+ return;
+
+ if (ctx->ack.width == 64) {
+ val = readq(ctx->ack.addr);
+ val &= ctx->ack.preserve;
+ val |= ctx->ack.set;
+ writeq(val, ctx->ack.addr);
+ } else {
+ val = readl(ctx->ack.addr);
+ val &= (u32)ctx->ack.preserve;
+ val |= (u32)ctx->ack.set;
+ writel(val, ctx->ack.addr);
+ }
+}
+
+static void ghes_ffh_fatal(struct ghes_ffh *ctx)
+{
+ __ghes_print_estatus(KERN_EMERG, ctx->generic, ctx->estatus);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
+ panic("GHES: fatal firmware-first CPER record from %s\n",
+ dev_name(ctx->dev));
+}
+
+static void ghes_ffh_process(struct ghes_ffh *ctx)
+{
+ unsigned long flags;
+ int sev;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ if (ghes_ffh_copy_status(ctx))
+ goto out;
+
+ sev = ghes_severity(ctx->estatus->error_severity);
+ if (sev >= GHES_SEV_PANIC)
+ ghes_ffh_fatal(ctx);
+
+ if (!ghes_estatus_cached(ctx->estatus)) {
+ if (ghes_print_estatus(NULL, ctx->generic, ctx->estatus))
+ ghes_estatus_cache_add(ctx->generic, ctx->estatus);
+ }
+
+ ghes_cper_handle_status(ctx->dev, ctx->generic, ctx->estatus, ctx->sync);
+
+ ghes_ffh_ack(ctx);
+
+out:
+ spin_unlock_irqrestore(&ctx->lock, flags);
+}
+
+static irqreturn_t ghes_ffh_irq(int irq, void *data)
+{
+ struct ghes_ffh *ctx = data;
+
+ ghes_ffh_process(ctx);
+
+ return IRQ_HANDLED;
+}
+
+static int ghes_ffh_init_ack(struct platform_device *pdev,
+ struct ghes_ffh *ctx)
+{
+ struct resource *res;
+ size_t size;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return 0;
+
+ ctx->ack.addr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(ctx->ack.addr))
+ return PTR_ERR(ctx->ack.addr);
+
+ size = resource_size(res);
+ switch (size) {
+ case 4:
+ ctx->ack.width = 32;
+ ctx->ack.preserve = ~0U;
+ break;
+ case 8:
+ ctx->ack.width = 64;
+ ctx->ack.preserve = ~0ULL;
+ break;
+ default:
+ dev_err(&pdev->dev, "Unsupported ack resource size %zu\n", size);
+ return -EINVAL;
+ }
+
+ ctx->ack.set = BIT_ULL(0);
+ ctx->ack.present = true;
+ return 0;
+}
+
+static int ghes_ffh_probe(struct platform_device *pdev)
+{
+ struct ghes_ffh *ctx;
+ struct resource *res;
+ int rc;
+
+ ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ spin_lock_init(&ctx->lock);
+ ctx->dev = &pdev->dev;
+ ctx->sync = of_property_read_bool(pdev->dev.of_node, "arm,sea-notify");
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "status region missing\n");
+ return -EINVAL;
+ }
+
+ ctx->status_len = resource_size(res);
+ if (!ctx->status_len) {
+ dev_err(&pdev->dev, "Status region has zero length\n");
+ return -EINVAL;
+ }
+
+ ctx->status = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(ctx->status))
+ return PTR_ERR(ctx->status);
+
+ rc = ghes_ffh_init_ack(pdev, ctx);
+ if (rc)
+ return rc;
+
+ rc = ghes_ffh_init_pool();
+ if (rc)
+ return rc;
+
+ ctx->estatus = devm_kzalloc(&pdev->dev, ctx->status_len, GFP_KERNEL);
+ if (!ctx->estatus)
+ return -ENOMEM;
+
+ ctx->generic = devm_kzalloc(&pdev->dev, sizeof(*ctx->generic), GFP_KERNEL);
+ if (!ctx->generic)
+ return -ENOMEM;
+
+ ctx->generic->header.type = ACPI_HEST_TYPE_GENERIC_ERROR;
+ ctx->generic->header.source_id =
+ atomic_inc_return(&ghes_ffh_source_ids);
+ ctx->generic->notify.type = ctx->sync ?
+ ACPI_HEST_NOTIFY_SEA : ACPI_HEST_NOTIFY_EXTERNAL;
+ ctx->generic->error_block_length = ctx->status_len;
+
+ ctx->irq = platform_get_irq_optional(pdev, 0);
+ if (ctx->irq <= 0) {
+ if (ctx->irq == -EPROBE_DEFER)
+ return ctx->irq;
+ dev_err(&pdev->dev, "interrupt is required (%d)\n", ctx->irq);
+ return -EINVAL;
+ }
+
+ rc = devm_request_threaded_irq(&pdev->dev, ctx->irq,
+ NULL, ghes_ffh_irq,
+ IRQF_ONESHOT,
+ dev_name(&pdev->dev), ctx);
+ if (rc)
+ return rc;
+
+ platform_set_drvdata(pdev, ctx);
+ dev_info(&pdev->dev, "Firmware-first CPER status provider (interrupt)\n");
+ return 0;
+}
+
+static void ghes_ffh_remove(struct platform_device *pdev)
+{
+}
+
+static const struct of_device_id ghes_ffh_of_match[] = {
+ { .compatible = "arm,ras-ffh" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ghes_ffh_of_match);
+
+static struct platform_driver ghes_ffh_driver = {
+ .driver = {
+ .name = "esource-dt",
+ .of_match_table = ghes_ffh_of_match,
+ },
+ .probe = ghes_ffh_probe,
+ .remove = ghes_ffh_remove,
+};
+
+module_platform_driver(ghes_ffh_driver);
+
+MODULE_AUTHOR("Ahmed Tiba <ahmed.tiba at arm.com>");
+MODULE_DESCRIPTION("Firmware-first CPER provider for DeviceTree platforms");
+MODULE_LICENSE("GPL");
diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
index f7c9fba62585..d43185c020ee 100644
--- a/include/acpi/ghes_cper.h
+++ b/include/acpi/ghes_cper.h
@@ -75,6 +75,15 @@ static inline bool is_hest_sync_notify(struct ghes *ghes)
return notify_type == ACPI_HEST_NOTIFY_SEA;
}
+static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
+{
+ if (estatus->raw_data_length)
+ return estatus->raw_data_offset + \
+ estatus->raw_data_length;
+ else
+ return sizeof(*estatus) + estatus->data_length;
+}
+
struct ghes_vendor_record_entry {
struct work_struct work;
int error_severity;
--
2.43.0
More information about the linux-arm-kernel
mailing list