[PATCH 02/12] ras: add estatus core implementation

Thu Dec 18 07:42:42 PST 2025

On Wed, Dec 17, 2025 at 11:28:35AM +0000, Ahmed Tiba wrote:
> Add estatus.c, hook it into the EFI Makefile, and register
> the MAINTAINERS entry for the new code. The implementation provides the
> memory-pool helpers, notifier plumbing, and utility functions that the
> GHES and DeviceTree providers will reuse in later commits.
> 
> Signed-off-by: Ahmed Tiba <ahmed.tiba at arm.com>
> ---
>  MAINTAINERS                    |   1 +
>  drivers/firmware/efi/Makefile  |   1 +
>  drivers/firmware/efi/estatus.c | 560 +++++++++++++++++++++++++++++++++

If I'm understanding this patch series, you will be basically moving more than
half of the code from drivers/acpi/apei/ghes.c to drivers/firmware/efi/estatus.c:

 drivers/acpi/apei/ghes.c                   | 1292 ++-------------
 drivers/firmware/efi/estatus.c             | 1056 ++++++++++++

 $ wc drivers/acpi/apei/ghes.c drivers/firmware/efi/estatus.c -l
  894 drivers/acpi/apei/ghes.c
 1056 drivers/firmware/efi/estatus.c
 1950 total

The way you're doing of adding things first, and then removing on
separate patches is error prone, makes it hard to review and 
it becomes a lot harder to identify whose are the original authors
of the code.

This will cause undetected conflicts with already-submitted patches
that are under review.

You should instead be moving function per function as-is. Then,
adjust the code to make it more generic.

Regards,
Mauro

>  3 files changed, 562 insertions(+)
>  create mode 100644 drivers/firmware/efi/estatus.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 501b6d300aa5..67d79d4e612d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -21760,6 +21760,7 @@ F:	drivers/rapidio/
>  RAS ERROR STATUS
>  M:	Ahmed Tiba <ahmed.tiba at arm.com>
>  S:	Maintained
> +F:	drivers/firmware/efi/estatus.c
>  F:	include/linux/estatus.h
>  
>  RAS INFRASTRUCTURE
> diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
> index 8efbcf699e4f..03708d915bcf 100644
> --- a/drivers/firmware/efi/Makefile
> +++ b/drivers/firmware/efi/Makefile
> @@ -20,6 +20,7 @@ obj-$(CONFIG_EFI_PARAMS_FROM_FDT)	+= fdtparams.o
>  obj-$(CONFIG_EFI_ESRT)			+= esrt.o
>  obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
>  obj-$(CONFIG_UEFI_CPER)			+= cper.o cper_cxl.o
> +obj-$(CONFIG_RAS_ESTATUS_CORE)		+= estatus.o
>  obj-$(CONFIG_EFI_RUNTIME_WRAPPERS)	+= runtime-wrappers.o
>  subdir-$(CONFIG_EFI_STUB)		+= libstub
>  obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
> diff --git a/drivers/firmware/efi/estatus.c b/drivers/firmware/efi/estatus.c
> new file mode 100644
> index 000000000000..8dae5c73ce27
> --- /dev/null
> +++ b/drivers/firmware/efi/estatus.c
> @@ -0,0 +1,560 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Firmware-first RAS: Generic Error Status Core
> + *
> + * Copyright (C) 2025 ARM Ltd.
> + * Author: Ahmed Tiba <ahmed.tiba at arm.com>
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/cper.h>
> +#include <linux/ratelimit.h>
> +#include <linux/vmalloc.h>
> +#include <linux/llist.h>
> +#include <linux/genalloc.h>
> +#include <linux/pci.h>
> +#include <linux/pfn.h>
> +#include <linux/aer.h>
> +#include <linux/nmi.h>
> +#include <linux/sched/clock.h>
> +#include <linux/uuid.h>
> +#include <linux/kconfig.h>
> +#include <linux/ras.h>
> +#include <linux/mutex.h>
> +#include <linux/notifier.h>
> +#include <linux/workqueue.h>
> +#include <linux/task_work.h>
> +#include <ras/ras_event.h>
> +
> +#include <linux/estatus.h>
> +#include <asm/fixmap.h>
> +
> +void estatus_pool_region_free(unsigned long addr, u32 size);
> +
> +static void estatus_log_hw_error(char level, const char *seq_tag,
> +				 const char *name)
> +{
> +	switch (level) {
> +	case '0':
> +		pr_emerg("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	case '1':
> +		pr_alert("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	case '2':
> +		pr_crit("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	case '3':
> +		pr_err("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	case '4':
> +		pr_warn("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	case '5':
> +		pr_notice("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	case '6':
> +		pr_info("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	default:
> +		pr_debug("%sHardware error from %s\n", seq_tag, name);
> +		break;
> +	}
> +}
> +
> +static inline u32 estatus_len(struct acpi_hest_generic_status *estatus)
> +{
> +	if (estatus->raw_data_length)
> +		return estatus->raw_data_offset + estatus->raw_data_length;
> +
> +	return sizeof(*estatus) + estatus->data_length;
> +}
> +
> +#define ESTATUS_PFX	"ESTATUS: "
> +
> +#define ESTATUS_ESOURCE_PREALLOC_MAX_SIZE_SIZE	65536
> +
> +#define ESTATUS_POOL_MIN_ALLOC_ORDER 3
> +
> +/* This is just an estimation for memory pool allocation */
> +#define ESTATUS_CACHE_AVG_SIZE	512
> +
> +#define ESTATUS_CACHES_SIZE	4
> +
> +#define ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
> +/* Prevent too many caches are allocated because of RCU */
> +#define ESTATUS_CACHE_ALLOCED_MAX	(ESTATUS_CACHES_SIZE * 3 / 2)
> +
> +#define ESTATUS_CACHE_LEN(estatus_len)			\
> +	(sizeof(struct estatus_cache) + (estatus_len))
> +#define ESTATUS_FROM_CACHE(cache)			\
> +	((struct acpi_hest_generic_status *)		\
> +	 ((struct estatus_cache *)(cache) + 1))
> +
> +#define ESTATUS_NODE_LEN(estatus_len)			\
> +	(sizeof(struct estatus_node) + (estatus_len))
> +#define ESTATUS_FROM_NODE(node)				\
> +	((struct acpi_hest_generic_status *)		\
> +	 ((struct estatus_node *)(node) + 1))
> +
> +#define ESTATUS_VENDOR_ENTRY_LEN(gdata_len)		\
> +	(sizeof(struct estatus_vendor_record_entry) + (gdata_len))
> +#define ESTATUS_GDATA_FROM_VENDOR_ENTRY(vendor_entry)	\
> +	((struct acpi_hest_generic_data *)		\
> +	((struct estatus_vendor_record_entry *)(vendor_entry) + 1))
> +
> +static ATOMIC_NOTIFIER_HEAD(estatus_report_chain);
> +
> +struct estatus_vendor_record_entry {
> +	struct work_struct work;
> +	int error_severity;
> +	char vendor_record[];
> +};
> +
> +static struct estatus_cache __rcu *estatus_caches[ESTATUS_CACHES_SIZE];
> +static atomic_t estatus_cache_alloced;
> +
> +static int estatus_panic_timeout __read_mostly = 30;
> +
> +static struct gen_pool *estatus_pool;
> +static DEFINE_MUTEX(estatus_pool_mutex);
> +
> +static inline const char *estatus_source_name(struct estatus_source *source)
> +{
> +	if (source->ops && source->ops->get_name)
> +		return source->ops->get_name(source);
> +
> +	return "unknown";
> +}
> +
> +static inline size_t estatus_source_max_len(struct estatus_source *source)
> +{
> +	if (source->ops && source->ops->get_max_len)
> +		return source->ops->get_max_len(source);
> +
> +	return 0;
> +}
> +
> +static inline enum estatus_notify_mode
> +estatus_source_notify_mode(struct estatus_source *source)
> +{
> +	if (source->ops && source->ops->get_notify_mode)
> +		return source->ops->get_notify_mode(source);
> +
> +	return ESTATUS_NOTIFY_ASYNC;
> +}
> +
> +static inline int estatus_source_get_phys(struct estatus_source *source,
> +					  phys_addr_t *addr)
> +{
> +	if (!source->ops || !source->ops->get_phys)
> +		return -EOPNOTSUPP;
> +
> +	return source->ops->get_phys(source, addr);
> +}
> +
> +static inline int estatus_source_read(struct estatus_source *source,
> +				      phys_addr_t addr, void *buf, size_t len,
> +				      enum fixed_addresses fixmap_idx)
> +{
> +	if (!source->ops || !source->ops->read)
> +		return -EOPNOTSUPP;
> +
> +	return source->ops->read(source, addr, buf, len, fixmap_idx);
> +}
> +
> +static inline int estatus_source_write(struct estatus_source *source,
> +				       phys_addr_t addr, const void *buf,
> +				       size_t len,
> +				       enum fixed_addresses fixmap_idx)
> +{
> +	if (!source->ops || !source->ops->write)
> +		return -EOPNOTSUPP;
> +
> +	return source->ops->write(source, addr, buf, len, fixmap_idx);
> +}
> +
> +static inline void estatus_source_ack(struct estatus_source *source)
> +{
> +	if (source->ops && source->ops->ack)
> +		source->ops->ack(source);
> +}
> +
> +int estatus_pool_init(unsigned int num_ghes)
> +{
> +	unsigned long addr, len;
> +	int rc = 0;
> +
> +	mutex_lock(&estatus_pool_mutex);
> +	if (estatus_pool)
> +		goto out_unlock;
> +
> +	estatus_pool = gen_pool_create(ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
> +	if (!estatus_pool) {
> +		rc = -ENOMEM;
> +		goto out_unlock;
> +	}
> +
> +	if (!num_ghes)
> +		num_ghes = 1;
> +
> +	len = ESTATUS_CACHE_AVG_SIZE * ESTATUS_CACHE_ALLOCED_MAX;
> +	len += (num_ghes * ESTATUS_ESOURCE_PREALLOC_MAX_SIZE_SIZE);
> +
> +	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
> +	if (!addr) {
> +		rc = -ENOMEM;
> +		goto err_pool_alloc;
> +	}
> +
> +	rc = gen_pool_add(estatus_pool, addr, PAGE_ALIGN(len), -1);
> +	if (rc)
> +		goto err_pool_add;
> +
> +out_unlock:
> +	mutex_unlock(&estatus_pool_mutex);
> +	return rc;
> +
> +err_pool_add:
> +	vfree((void *)addr);
> +err_pool_alloc:
> +	gen_pool_destroy(estatus_pool);
> +	estatus_pool = NULL;
> +	goto out_unlock;
> +}
> +
> +/**
> + * estatus_pool_region_free - free previously allocated memory
> + *				   from the estatus_pool.
> + * @addr: address of memory to free.
> + * @size: size of memory to free.
> + *
> + * Returns none.
> + */
> +void estatus_pool_region_free(unsigned long addr, u32 size)
> +{
> +	gen_pool_free(estatus_pool, addr, size);
> +}
> +EXPORT_SYMBOL_GPL(estatus_pool_region_free);
> +
> +/* Check the top-level record header has an appropriate size. */
> +static int __estatus_check_estatus(struct estatus_source *source,
> +				   struct acpi_hest_generic_status *estatus)
> +{
> +	u32 len = estatus_len(estatus);
> +	size_t max_len = estatus_source_max_len(source);
> +
> +	if (len < sizeof(*estatus)) {
> +		pr_warn_ratelimited(FW_WARN ESTATUS_PFX "Truncated error status block!\n");
> +		return -EIO;
> +	}
> +
> +	if (max_len && len > max_len) {
> +		pr_warn_ratelimited(FW_WARN ESTATUS_PFX "Invalid error status block length!\n");
> +		return -EIO;
> +	}
> +
> +	if (cper_estatus_check_header(estatus)) {
> +		pr_warn_ratelimited(FW_WARN ESTATUS_PFX "Invalid CPER header!\n");
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +/* Read the CPER block, returning its address, and header in estatus. */
> +static int __estatus_peek_estatus(struct estatus_source *source,
> +				  struct acpi_hest_generic_status *estatus,
> +				  phys_addr_t *buf_paddr,
> +				  enum fixed_addresses fixmap_idx)
> +{
> +	int rc;
> +
> +	rc = estatus_source_get_phys(source, buf_paddr);
> +	if (rc) {
> +		*buf_paddr = 0;
> +		pr_warn_ratelimited(FW_WARN ESTATUS_PFX
> +				    "Failed to get error status block address for provider %s: %d\n",
> +				    estatus_source_name(source), rc);
> +		return rc;
> +	}
> +
> +	if (!*buf_paddr)
> +		return -ENOENT;
> +
> +	rc = estatus_source_read(source, *buf_paddr, estatus,
> +				 sizeof(*estatus), fixmap_idx);
> +	if (rc)
> +		return rc;
> +
> +	if (!estatus->block_status) {
> +		*buf_paddr = 0;
> +		return -ENOENT;
> +	}
> +
> +	return 0;
> +}
> +
> +static int __estatus_read_estatus(struct estatus_source *source,
> +				  struct acpi_hest_generic_status *estatus,
> +				  phys_addr_t buf_paddr,
> +				  enum fixed_addresses fixmap_idx,
> +				  size_t buf_len)
> +{
> +	int rc;
> +
> +	rc = estatus_source_read(source, buf_paddr, estatus, buf_len,
> +				 fixmap_idx);
> +	if (rc)
> +		return rc;
> +
> +	if (cper_estatus_check(estatus)) {
> +		pr_warn_ratelimited(FW_WARN ESTATUS_PFX
> +				    "Failed to read error status block for provider %s!\n",
> +				    estatus_source_name(source));
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +static int estatus_read_estatus(struct estatus_source *source,
> +				struct acpi_hest_generic_status *estatus,
> +				phys_addr_t *buf_paddr,
> +				enum fixed_addresses fixmap_idx)
> +{
> +	int rc;
> +
> +	rc = __estatus_peek_estatus(source, estatus, buf_paddr, fixmap_idx);
> +	if (rc)
> +		return rc;
> +
> +	rc = __estatus_check_estatus(source, estatus);
> +	if (rc)
> +		return rc;
> +
> +	return __estatus_read_estatus(source, estatus, *buf_paddr,
> +				      fixmap_idx, estatus_len(estatus));
> +}
> +
> +static void estatus_clear_estatus(struct estatus_source *source,
> +				  struct acpi_hest_generic_status *estatus,
> +				  phys_addr_t buf_paddr,
> +				  enum fixed_addresses fixmap_idx)
> +{
> +	int rc;
> +
> +	estatus->block_status = 0;
> +
> +	if (!buf_paddr)
> +		return;
> +
> +	rc = estatus_source_write(source, buf_paddr, estatus,
> +				  sizeof(estatus->block_status), fixmap_idx);
> +	if (rc)
> +		pr_warn_ratelimited(FW_WARN ESTATUS_PFX
> +				    "Failed to clear error status block for provider %s: %d\n",
> +				    estatus_source_name(source), rc);
> +
> +	estatus_source_ack(source);
> +}
> +
> +static inline int estatus_severity(int severity)
> +{
> +	switch (severity) {
> +	case CPER_SEV_INFORMATIONAL:
> +		return ESTATUS_SEV_NO;
> +	case CPER_SEV_CORRECTED:
> +		return ESTATUS_SEV_CORRECTED;
> +	case CPER_SEV_RECOVERABLE:
> +		return ESTATUS_SEV_RECOVERABLE;
> +	case CPER_SEV_FATAL:
> +		return ESTATUS_SEV_PANIC;
> +	default:
> +		/* Unknown, go panic */
> +		return ESTATUS_SEV_PANIC;
> +	}
> +}
> +
> +static void __estatus_print_estatus(const char *pfx,
> +				    struct estatus_source *source,
> +				    const struct acpi_hest_generic_status *estatus)
> +{
> +	static atomic_t seqno;
> +	unsigned int curr_seqno;
> +	char pfx_seq[64];
> +	char seq_tag[64];
> +	const char *name = estatus_source_name(source);
> +	const char *level = pfx;
> +	char level_char = '4';
> +
> +	if (!level) {
> +		if (estatus_severity(estatus->error_severity) <=
> +		    ESTATUS_SEV_CORRECTED)
> +			level = KERN_WARNING;
> +		else
> +			level = KERN_ERR;
> +	}
> +
> +	if (level[0] == KERN_SOH_ASCII && level[1])
> +		level_char = level[1];
> +	else if (estatus_severity(estatus->error_severity) > ESTATUS_SEV_CORRECTED)
> +		level_char = '3';
> +
> +	curr_seqno = atomic_inc_return(&seqno);
> +	snprintf(seq_tag, sizeof(seq_tag), "{%u}" HW_ERR, curr_seqno);
> +	snprintf(pfx_seq, sizeof(pfx_seq), "%s%s", level, seq_tag);
> +	estatus_log_hw_error(level_char, seq_tag, name);
> +	cper_estatus_print(pfx_seq, estatus);
> +}
> +
> +static int estatus_print_estatus(const char *pfx,
> +				 struct estatus_source *source,
> +				 const struct acpi_hest_generic_status *estatus)
> +{
> +	/* Not more than 2 messages every 5 seconds */
> +	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5 * HZ, 2);
> +	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5 * HZ, 2);
> +	struct ratelimit_state *ratelimit;
> +
> +	if (estatus_severity(estatus->error_severity) <= ESTATUS_SEV_CORRECTED)
> +		ratelimit = &ratelimit_corrected;
> +	else
> +		ratelimit = &ratelimit_uncorrected;
> +	if (__ratelimit(ratelimit)) {
> +		__estatus_print_estatus(pfx, source, estatus);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * GHES error status reporting throttle, to report more kinds of
> + * errors, instead of just most frequently occurred errors.
> + */
> +static int estatus_cached(struct acpi_hest_generic_status *estatus)
> +{
> +	u32 len;
> +	int i, cached = 0;
> +	unsigned long long now;
> +	struct estatus_cache *cache;
> +	struct acpi_hest_generic_status *cache_estatus;
> +
> +	len = estatus_len(estatus);
> +	rcu_read_lock();
> +	for (i = 0; i < ESTATUS_CACHES_SIZE; i++) {
> +		cache = rcu_dereference(estatus_caches[i]);
> +		if (!cache)
> +			continue;
> +		if (len != cache->estatus_len)
> +			continue;
> +		cache_estatus = ESTATUS_FROM_CACHE(cache);
> +		if (memcmp(estatus, cache_estatus, len))
> +			continue;
> +		atomic_inc(&cache->count);
> +		now = sched_clock();
> +		if (now - cache->time_in < ESTATUS_IN_CACHE_MAX_NSEC)
> +			cached = 1;
> +		break;
> +	}
> +	rcu_read_unlock();
> +	return cached;
> +}
> +
> +static struct estatus_cache *estatus_cache_alloc(struct estatus_source *source,
> +						 struct acpi_hest_generic_status *estatus)
> +{
> +	int alloced;
> +	u32 len, cache_len;
> +	struct estatus_cache *cache;
> +	struct acpi_hest_generic_status *cache_estatus;
> +
> +	alloced = atomic_add_return(1, &estatus_cache_alloced);
> +	if (alloced > ESTATUS_CACHE_ALLOCED_MAX) {
> +		atomic_dec(&estatus_cache_alloced);
> +		return NULL;
> +	}
> +	len = estatus_len(estatus);
> +	cache_len = ESTATUS_CACHE_LEN(len);
> +	cache = (void *)gen_pool_alloc(estatus_pool, cache_len);
> +	if (!cache) {
> +		atomic_dec(&estatus_cache_alloced);
> +		return NULL;
> +	}
> +	cache_estatus = ESTATUS_FROM_CACHE(cache);
> +	memcpy(cache_estatus, estatus, len);
> +	cache->estatus_len = len;
> +	atomic_set(&cache->count, 0);
> +	cache->source = source;
> +	cache->time_in = sched_clock();
> +	return cache;
> +}
> +
> +static void estatus_cache_rcu_free(struct rcu_head *head)
> +{
> +	struct estatus_cache *cache;
> +	u32 len;
> +
> +	cache = container_of(head, struct estatus_cache, rcu);
> +	len = estatus_len(ESTATUS_FROM_CACHE(cache));
> +	len = ESTATUS_CACHE_LEN(len);
> +	gen_pool_free(estatus_pool, (unsigned long)cache, len);
> +	atomic_dec(&estatus_cache_alloced);
> +}
> +
> +static void estatus_cache_add(struct estatus_source *source,
> +			      struct acpi_hest_generic_status *estatus)
> +{
> +	unsigned long long now, duration, period, max_period = 0;
> +	struct estatus_cache *cache, *new_cache;
> +	struct estatus_cache __rcu *victim;
> +	int i, slot = -1, count;
> +
> +	new_cache = estatus_cache_alloc(source, estatus);
> +	if (!new_cache)
> +		return;
> +
> +	rcu_read_lock();
> +	now = sched_clock();
> +	for (i = 0; i < ESTATUS_CACHES_SIZE; i++) {
> +		cache = rcu_dereference(estatus_caches[i]);
> +		if (!cache) {
> +			slot = i;
> +			break;
> +		}
> +		duration = now - cache->time_in;
> +		if (duration >= ESTATUS_IN_CACHE_MAX_NSEC) {
> +			slot = i;
> +			break;
> +		}
> +		count = atomic_read(&cache->count);
> +		period = duration;
> +		do_div(period, (count + 1));
> +		if (period > max_period) {
> +			max_period = period;
> +			slot = i;
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	if (slot != -1) {
> +		/*
> +		 * Use release semantics to ensure that estatus_cached()
> +		 * running on another CPU will see the updated cache fields if
> +		 * it can see the new value of the pointer.
> +		 */
> +		victim = xchg_release(&estatus_caches[slot],
> +				      RCU_INITIALIZER(new_cache));
> +
> +		/*
> +		 * At this point, victim may point to a cached item different
> +		 * from the one based on which we selected the slot. Instead of
> +		 * going to the loop again to pick another slot, let's just
> +		 * drop the other item anyway: this may cause a false cache
> +		 * miss later on, but that won't cause any problems.
> +		 */
> +		if (victim)
> +			call_rcu(&unrcu_pointer(victim)->rcu,
> +				 estatus_cache_rcu_free);
> +	}
> +}
> -- 
> 2.43.0
> 

-- 
Thanks,
Mauro