[PATCH v2 11/11] RAS: add DeviceTree firmware-first CPER provider

Himanshu Chauhan himanshu.chauhan at oss.qualcomm.com
Wed Feb 25 23:01:46 PST 2026


On Fri, Feb 20, 2026 at 7:13 PM Ahmed Tiba <ahmed.tiba at arm.com> wrote:
>
> Add a DeviceTree firmware-first CPER provider that reuses the shared
> GHES helpers, wire it into the RAS Kconfig/Makefile and document it in
> the admin guide. Update MAINTAINERS now that the driver exists.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba at arm.com>
> ---
>  Documentation/admin-guide/RAS/main.rst |  18 +++
>  MAINTAINERS                            |   1 +
>  drivers/acpi/apei/apei-internal.h      |  10 +-
>  drivers/acpi/apei/ghes_cper.c          |   2 +
>  drivers/ras/Kconfig                    |  12 ++
>  drivers/ras/Makefile                   |   1 +
>  drivers/ras/esource-dt.c               | 264 +++++++++++++++++++++++++++++++++
>  include/acpi/ghes_cper.h               |   9 ++
>  8 files changed, 308 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/admin-guide/RAS/main.rst b/Documentation/admin-guide/RAS/main.rst
> index 5a45db32c49b..4ffabaaeabb1 100644
> --- a/Documentation/admin-guide/RAS/main.rst
> +++ b/Documentation/admin-guide/RAS/main.rst
> @@ -205,6 +205,24 @@ Architecture (MCA)\ [#f3]_.
>  .. [#f3] For more details about the Machine Check Architecture (MCA),
>    please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
>
> +Firmware-first CPER via DeviceTree
> +----------------------------------
> +
> +Some systems expose Common Platform Error Record (CPER) data
> +via DeviceTree instead of ACPI HEST tables.
> +Enable ``CONFIG_RAS_ESOURCE_DT`` to build the ``drivers/ras/esource-dt.c``
> +driver and describe the CPER error source buffer with the
> +``Documentation/devicetree/bindings/firmware/arm,ras-ffh.yaml`` binding.
> +The driver reuses the GHES CPER helper object in
> +``drivers/acpi/apei/ghes_cper.c`` so the logging, notifier chains, and
> +memory failure handling match the ACPI GHES behaviour even when
> +ACPI is disabled.
> +
> +Once a platform describes a firmware-first provider, both ACPI GHES and the
> +DeviceTree driver reuse the same code paths. This keeps the behaviour
> +consistent regardless of whether the error source is described via ACPI
> +tables or DeviceTree.
> +
>  EDAC - Error Detection And Correction
>  *************************************
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 47db7877b485..fa6113b482b7 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22031,6 +22031,7 @@ RAS ERROR STATUS
>  M:     Ahmed Tiba <ahmed.tiba at arm.com>
>  S:     Maintained
>  F:     Documentation/devicetree/bindings/firmware/arm,ras-ffh.yaml
> +F:     drivers/ras/esource-dt.c
>
>  RAS INFRASTRUCTURE
>  M:     Tony Luck <tony.luck at intel.com>
> diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
> index 77c10a7a7a9f..c16ac541f15b 100644
> --- a/drivers/acpi/apei/apei-internal.h
> +++ b/drivers/acpi/apei/apei-internal.h
> @@ -8,6 +8,7 @@
>  #define APEI_INTERNAL_H
>
>  #include <linux/acpi.h>
> +#include <acpi/ghes_cper.h>
>
>  struct apei_exec_context;
>
> @@ -120,15 +121,6 @@ int apei_exec_collect_resources(struct apei_exec_context *ctx,
>  struct dentry;
>  struct dentry *apei_get_debugfs_dir(void);
>
> -static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
> -{
> -       if (estatus->raw_data_length)
> -               return estatus->raw_data_offset + \
> -                       estatus->raw_data_length;
> -       else
> -               return sizeof(*estatus) + estatus->data_length;
> -}
> -
>  int apei_osc_setup(void);
>
>  int einj_get_available_error_type(u32 *type, int einj_action);
> diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
> index 29b790160e91..9b2d1b8cf9f4 100644
> --- a/drivers/acpi/apei/ghes_cper.c
> +++ b/drivers/acpi/apei/ghes_cper.c
> @@ -42,7 +42,9 @@
>  #include <asm/fixmap.h>
>  #include <asm/tlbflush.h>
>
> +#ifdef CONFIG_ACPI_APEI
>  #include "apei-internal.h"
> +#endif
>
>  ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
>
> diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
> index fc4f4bb94a4c..ea6d96713020 100644
> --- a/drivers/ras/Kconfig
> +++ b/drivers/ras/Kconfig
> @@ -34,6 +34,18 @@ if RAS
>  source "arch/x86/ras/Kconfig"
>  source "drivers/ras/amd/atl/Kconfig"
>
> +config RAS_ESOURCE_DT
> +       bool "DeviceTree firmware-first CPER error source block provider"
> +       depends on OF
> +       depends on ARM64
> +       select GHES_CPER_HELPERS
> +       help
> +         Enable support for firmware-first Common Platform Error Record (CPER)
> +         error source block providers that are described via DeviceTree
> +         instead of ACPI HEST tables. The driver reuses the existing GHES
> +         CPER helpers so the error processing matches the ACPI code paths,
> +         but it can be built even when ACPI is disabled.
> +
>  config RAS_FMPM
>         tristate "FRU Memory Poison Manager"
>         default m
> diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
> index 11f95d59d397..53558a1707b3 100644
> --- a/drivers/ras/Makefile
> +++ b/drivers/ras/Makefile
> @@ -2,6 +2,7 @@
>  obj-$(CONFIG_RAS)      += ras.o
>  obj-$(CONFIG_DEBUG_FS) += debugfs.o
>  obj-$(CONFIG_RAS_CEC)  += cec.o
> +obj-$(CONFIG_RAS_ESOURCE_DT)   += esource-dt.o
>
>  obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o
>  obj-y                  += amd/atl/
> diff --git a/drivers/ras/esource-dt.c b/drivers/ras/esource-dt.c
> new file mode 100644
> index 000000000000..b575a2258536
> --- /dev/null
> +++ b/drivers/ras/esource-dt.c
> @@ -0,0 +1,264 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * DeviceTree provider for firmware-first CPER error source block.
> + *
> + * This driver shares the GHES CPER helpers so we keep the reporting and
> + * notifier behaviour identical to ACPI GHES
> + *
> + * Copyright (C) 2025 ARM Ltd.
> + * Author: Ahmed Tiba <ahmed.tiba at arm.com>
> + */
> +
> +#include <linux/atomic.h>
> +#include <linux/bitops.h>
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/io-64-nonatomic-lo-hi.h>
> +#include <linux/module.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +#include <linux/panic.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +#include <acpi/ghes.h>
> +#include <acpi/ghes_cper.h>
> +
> +static atomic_t ghes_ffh_source_ids = ATOMIC_INIT(0);
> +
> +struct ghes_ffh_ack {
> +       void __iomem *addr;
> +       u64 preserve;
> +       u64 set;
> +       u8 width;
> +       bool present;
> +};

Please don't use ffh. FFH stands for Fixed Feature Hardware. This is
making it confusing. As per ACPI specification, FFH can be used to
register read/write while handling errors.
I have started feeling that all this churn should be avoided. All the
GHES code is also being moved in the name of CPER helpers.

> +
> +struct ghes_ffh {
> +       struct device *dev;
> +       void __iomem *status;
> +       size_t status_len;
> +
> +       struct ghes_ffh_ack ack;
> +
> +       struct acpi_hest_generic *generic;
> +       struct acpi_hest_generic_status *estatus;
> +
> +       bool sync;
> +       int irq;
> +
> +       /* Serializes access to the firmware-owned buffer. */
> +       spinlock_t lock;
> +};
> +
> +static int ghes_ffh_init_pool(void)
> +{
> +       if (ghes_estatus_pool)
> +               return 0;
> +
> +       return ghes_estatus_pool_init(1);
> +}
> +
> +static int ghes_ffh_copy_status(struct ghes_ffh *ctx)
> +{
> +       memcpy_fromio(ctx->estatus, ctx->status, ctx->status_len);
> +       return 0;
> +}
> +
> +static void ghes_ffh_ack(struct ghes_ffh *ctx)
> +{
> +       u64 val;
> +
> +       if (!ctx->ack.present)
> +               return;
> +
> +       if (ctx->ack.width == 64) {
> +               val = readq(ctx->ack.addr);
> +               val &= ctx->ack.preserve;
> +               val |= ctx->ack.set;
> +               writeq(val, ctx->ack.addr);
> +       } else {
> +               val = readl(ctx->ack.addr);
> +               val &= (u32)ctx->ack.preserve;
> +               val |= (u32)ctx->ack.set;
> +               writel(val, ctx->ack.addr);
> +       }
> +}
> +
> +static void ghes_ffh_fatal(struct ghes_ffh *ctx)
> +{
> +       __ghes_print_estatus(KERN_EMERG, ctx->generic, ctx->estatus);
> +       add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
> +       panic("GHES: fatal firmware-first CPER record from %s\n",
> +             dev_name(ctx->dev));
> +}
> +
> +static void ghes_ffh_process(struct ghes_ffh *ctx)
> +{
> +       unsigned long flags;
> +       int sev;
> +
> +       spin_lock_irqsave(&ctx->lock, flags);
> +
> +       if (ghes_ffh_copy_status(ctx))
> +               goto out;
> +
> +       sev = ghes_severity(ctx->estatus->error_severity);
> +       if (sev >= GHES_SEV_PANIC)
> +               ghes_ffh_fatal(ctx);
> +
> +       if (!ghes_estatus_cached(ctx->estatus)) {
> +               if (ghes_print_estatus(NULL, ctx->generic, ctx->estatus))
> +                       ghes_estatus_cache_add(ctx->generic, ctx->estatus);
> +       }
> +
> +       ghes_cper_handle_status(ctx->dev, ctx->generic, ctx->estatus, ctx->sync);
> +
> +       ghes_ffh_ack(ctx);
> +
> +out:
> +       spin_unlock_irqrestore(&ctx->lock, flags);
> +}
> +
> +static irqreturn_t ghes_ffh_irq(int irq, void *data)
> +{
> +       struct ghes_ffh *ctx = data;
> +
> +       ghes_ffh_process(ctx);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +static int ghes_ffh_init_ack(struct platform_device *pdev,
> +                            struct ghes_ffh *ctx)
> +{
> +       struct resource *res;
> +       size_t size;
> +
> +       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +       if (!res)
> +               return 0;
> +
> +       ctx->ack.addr = devm_ioremap_resource(&pdev->dev, res);
> +       if (IS_ERR(ctx->ack.addr))
> +               return PTR_ERR(ctx->ack.addr);
> +
> +       size = resource_size(res);
> +       switch (size) {
> +       case 4:
> +               ctx->ack.width = 32;
> +               ctx->ack.preserve = ~0U;
> +               break;
> +       case 8:
> +               ctx->ack.width = 64;
> +               ctx->ack.preserve = ~0ULL;
> +               break;
> +       default:
> +               dev_err(&pdev->dev, "Unsupported ack resource size %zu\n", size);
> +               return -EINVAL;
> +       }
> +
> +       ctx->ack.set = BIT_ULL(0);
> +       ctx->ack.present = true;
> +       return 0;
> +}
> +
> +static int ghes_ffh_probe(struct platform_device *pdev)
> +{
> +       struct ghes_ffh *ctx;
> +       struct resource *res;
> +       int rc;
> +
> +       ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
> +       if (!ctx)
> +               return -ENOMEM;
> +
> +       spin_lock_init(&ctx->lock);
> +       ctx->dev = &pdev->dev;
> +       ctx->sync = of_property_read_bool(pdev->dev.of_node, "arm,sea-notify");
> +
> +       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +       if (!res) {
> +               dev_err(&pdev->dev, "status region missing\n");
> +               return -EINVAL;
> +       }
> +
> +       ctx->status_len = resource_size(res);
> +       if (!ctx->status_len) {
> +               dev_err(&pdev->dev, "Status region has zero length\n");
> +               return -EINVAL;
> +       }
> +
> +       ctx->status = devm_ioremap_resource(&pdev->dev, res);
> +       if (IS_ERR(ctx->status))
> +               return PTR_ERR(ctx->status);
> +
> +       rc = ghes_ffh_init_ack(pdev, ctx);
> +       if (rc)
> +               return rc;
> +
> +       rc = ghes_ffh_init_pool();
> +       if (rc)
> +               return rc;
> +
> +       ctx->estatus = devm_kzalloc(&pdev->dev, ctx->status_len, GFP_KERNEL);
> +       if (!ctx->estatus)
> +               return -ENOMEM;
> +
> +       ctx->generic = devm_kzalloc(&pdev->dev, sizeof(*ctx->generic), GFP_KERNEL);
> +       if (!ctx->generic)
> +               return -ENOMEM;
> +
> +       ctx->generic->header.type = ACPI_HEST_TYPE_GENERIC_ERROR;
> +       ctx->generic->header.source_id =
> +               atomic_inc_return(&ghes_ffh_source_ids);
> +       ctx->generic->notify.type = ctx->sync ?
> +               ACPI_HEST_NOTIFY_SEA : ACPI_HEST_NOTIFY_EXTERNAL;
> +       ctx->generic->error_block_length = ctx->status_len;
> +
> +       ctx->irq = platform_get_irq_optional(pdev, 0);
> +       if (ctx->irq <= 0) {
> +               if (ctx->irq == -EPROBE_DEFER)
> +                       return ctx->irq;
> +               dev_err(&pdev->dev, "interrupt is required (%d)\n", ctx->irq);
> +               return -EINVAL;
> +       }
> +
> +       rc = devm_request_threaded_irq(&pdev->dev, ctx->irq,
> +                                      NULL, ghes_ffh_irq,
> +                                      IRQF_ONESHOT,
> +                                      dev_name(&pdev->dev), ctx);
> +       if (rc)
> +               return rc;
> +
> +       platform_set_drvdata(pdev, ctx);
> +       dev_info(&pdev->dev, "Firmware-first CPER status provider (interrupt)\n");
> +       return 0;
> +}
> +
> +static void ghes_ffh_remove(struct platform_device *pdev)
> +{
> +}
> +
> +static const struct of_device_id ghes_ffh_of_match[] = {
> +       { .compatible = "arm,ras-ffh" },
> +       { /* sentinel */ }
> +};
> +MODULE_DEVICE_TABLE(of, ghes_ffh_of_match);
> +
> +static struct platform_driver ghes_ffh_driver = {
> +       .driver = {
> +               .name = "esource-dt",
> +               .of_match_table = ghes_ffh_of_match,
> +       },
> +       .probe = ghes_ffh_probe,
> +       .remove = ghes_ffh_remove,
> +};
> +
> +module_platform_driver(ghes_ffh_driver);
> +
> +MODULE_AUTHOR("Ahmed Tiba <ahmed.tiba at arm.com>");
> +MODULE_DESCRIPTION("Firmware-first CPER provider for DeviceTree platforms");
> +MODULE_LICENSE("GPL");
> diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
> index f7c9fba62585..d43185c020ee 100644
> --- a/include/acpi/ghes_cper.h
> +++ b/include/acpi/ghes_cper.h
> @@ -75,6 +75,15 @@ static inline bool is_hest_sync_notify(struct ghes *ghes)
>         return notify_type == ACPI_HEST_NOTIFY_SEA;
>  }
>
> +static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
> +{
> +       if (estatus->raw_data_length)
> +               return estatus->raw_data_offset + \
> +                       estatus->raw_data_length;
> +       else
> +               return sizeof(*estatus) + estatus->data_length;
> +}
> +
>  struct ghes_vendor_record_entry {
>         struct work_struct work;
>         int error_severity;
>
> --
> 2.43.0
>
>



More information about the linux-arm-kernel mailing list