[PATCH 02/12] ras: add estatus core implementation

Ahmed Tiba ahmed.tiba at arm.com
Wed Dec 17 03:28:35 PST 2025


Add estatus.c, hook it into the EFI Makefile, and register
the MAINTAINERS entry for the new code. The implementation provides the
memory-pool helpers, notifier plumbing, and utility functions that the
GHES and DeviceTree providers will reuse in later commits.

Signed-off-by: Ahmed Tiba <ahmed.tiba at arm.com>
---
 MAINTAINERS                    |   1 +
 drivers/firmware/efi/Makefile  |   1 +
 drivers/firmware/efi/estatus.c | 560 +++++++++++++++++++++++++++++++++
 3 files changed, 562 insertions(+)
 create mode 100644 drivers/firmware/efi/estatus.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 501b6d300aa5..67d79d4e612d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21760,6 +21760,7 @@ F:	drivers/rapidio/
 RAS ERROR STATUS
 M:	Ahmed Tiba <ahmed.tiba at arm.com>
 S:	Maintained
+F:	drivers/firmware/efi/estatus.c
 F:	include/linux/estatus.h
 
 RAS INFRASTRUCTURE
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 8efbcf699e4f..03708d915bcf 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_EFI_PARAMS_FROM_FDT)	+= fdtparams.o
 obj-$(CONFIG_EFI_ESRT)			+= esrt.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
 obj-$(CONFIG_UEFI_CPER)			+= cper.o cper_cxl.o
+obj-$(CONFIG_RAS_ESTATUS_CORE)		+= estatus.o
 obj-$(CONFIG_EFI_RUNTIME_WRAPPERS)	+= runtime-wrappers.o
 subdir-$(CONFIG_EFI_STUB)		+= libstub
 obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
diff --git a/drivers/firmware/efi/estatus.c b/drivers/firmware/efi/estatus.c
new file mode 100644
index 000000000000..8dae5c73ce27
--- /dev/null
+++ b/drivers/firmware/efi/estatus.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Firmware-first RAS: Generic Error Status Core
+ *
+ * Copyright (C) 2025 ARM Ltd.
+ * Author: Ahmed Tiba <ahmed.tiba at arm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/cper.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
+#include <linux/llist.h>
+#include <linux/genalloc.h>
+#include <linux/pci.h>
+#include <linux/pfn.h>
+#include <linux/aer.h>
+#include <linux/nmi.h>
+#include <linux/sched/clock.h>
+#include <linux/uuid.h>
+#include <linux/kconfig.h>
+#include <linux/ras.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/workqueue.h>
+#include <linux/task_work.h>
+#include <ras/ras_event.h>
+
+#include <linux/estatus.h>
+#include <asm/fixmap.h>
+
+void estatus_pool_region_free(unsigned long addr, u32 size);
+
+static void estatus_log_hw_error(char level, const char *seq_tag,
+				 const char *name)
+{
+	switch (level) {
+	case '0':
+		pr_emerg("%sHardware error from %s\n", seq_tag, name);
+		break;
+	case '1':
+		pr_alert("%sHardware error from %s\n", seq_tag, name);
+		break;
+	case '2':
+		pr_crit("%sHardware error from %s\n", seq_tag, name);
+		break;
+	case '3':
+		pr_err("%sHardware error from %s\n", seq_tag, name);
+		break;
+	case '4':
+		pr_warn("%sHardware error from %s\n", seq_tag, name);
+		break;
+	case '5':
+		pr_notice("%sHardware error from %s\n", seq_tag, name);
+		break;
+	case '6':
+		pr_info("%sHardware error from %s\n", seq_tag, name);
+		break;
+	default:
+		pr_debug("%sHardware error from %s\n", seq_tag, name);
+		break;
+	}
+}
+
+static inline u32 estatus_len(struct acpi_hest_generic_status *estatus)
+{
+	if (estatus->raw_data_length)
+		return estatus->raw_data_offset + estatus->raw_data_length;
+
+	return sizeof(*estatus) + estatus->data_length;
+}
+
+#define ESTATUS_PFX	"ESTATUS: "
+
+#define ESTATUS_ESOURCE_PREALLOC_MAX_SIZE_SIZE	65536
+
+#define ESTATUS_POOL_MIN_ALLOC_ORDER 3
+
+/* This is just an estimation for memory pool allocation */
+#define ESTATUS_CACHE_AVG_SIZE	512
+
+#define ESTATUS_CACHES_SIZE	4
+
+#define ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define ESTATUS_CACHE_ALLOCED_MAX	(ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define ESTATUS_CACHE_LEN(estatus_len)			\
+	(sizeof(struct estatus_cache) + (estatus_len))
+#define ESTATUS_FROM_CACHE(cache)			\
+	((struct acpi_hest_generic_status *)		\
+	 ((struct estatus_cache *)(cache) + 1))
+
+#define ESTATUS_NODE_LEN(estatus_len)			\
+	(sizeof(struct estatus_node) + (estatus_len))
+#define ESTATUS_FROM_NODE(node)				\
+	((struct acpi_hest_generic_status *)		\
+	 ((struct estatus_node *)(node) + 1))
+
+#define ESTATUS_VENDOR_ENTRY_LEN(gdata_len)		\
+	(sizeof(struct estatus_vendor_record_entry) + (gdata_len))
+#define ESTATUS_GDATA_FROM_VENDOR_ENTRY(vendor_entry)	\
+	((struct acpi_hest_generic_data *)		\
+	((struct estatus_vendor_record_entry *)(vendor_entry) + 1))
+
+static ATOMIC_NOTIFIER_HEAD(estatus_report_chain);
+
+struct estatus_vendor_record_entry {
+	struct work_struct work;
+	int error_severity;
+	char vendor_record[];
+};
+
+static struct estatus_cache __rcu *estatus_caches[ESTATUS_CACHES_SIZE];
+static atomic_t estatus_cache_alloced;
+
+static int estatus_panic_timeout __read_mostly = 30;
+
+static struct gen_pool *estatus_pool;
+static DEFINE_MUTEX(estatus_pool_mutex);
+
+static inline const char *estatus_source_name(struct estatus_source *source)
+{
+	if (source->ops && source->ops->get_name)
+		return source->ops->get_name(source);
+
+	return "unknown";
+}
+
+static inline size_t estatus_source_max_len(struct estatus_source *source)
+{
+	if (source->ops && source->ops->get_max_len)
+		return source->ops->get_max_len(source);
+
+	return 0;
+}
+
+static inline enum estatus_notify_mode
+estatus_source_notify_mode(struct estatus_source *source)
+{
+	if (source->ops && source->ops->get_notify_mode)
+		return source->ops->get_notify_mode(source);
+
+	return ESTATUS_NOTIFY_ASYNC;
+}
+
+static inline int estatus_source_get_phys(struct estatus_source *source,
+					  phys_addr_t *addr)
+{
+	if (!source->ops || !source->ops->get_phys)
+		return -EOPNOTSUPP;
+
+	return source->ops->get_phys(source, addr);
+}
+
+static inline int estatus_source_read(struct estatus_source *source,
+				      phys_addr_t addr, void *buf, size_t len,
+				      enum fixed_addresses fixmap_idx)
+{
+	if (!source->ops || !source->ops->read)
+		return -EOPNOTSUPP;
+
+	return source->ops->read(source, addr, buf, len, fixmap_idx);
+}
+
+static inline int estatus_source_write(struct estatus_source *source,
+				       phys_addr_t addr, const void *buf,
+				       size_t len,
+				       enum fixed_addresses fixmap_idx)
+{
+	if (!source->ops || !source->ops->write)
+		return -EOPNOTSUPP;
+
+	return source->ops->write(source, addr, buf, len, fixmap_idx);
+}
+
+static inline void estatus_source_ack(struct estatus_source *source)
+{
+	if (source->ops && source->ops->ack)
+		source->ops->ack(source);
+}
+
+int estatus_pool_init(unsigned int num_ghes)
+{
+	unsigned long addr, len;
+	int rc = 0;
+
+	mutex_lock(&estatus_pool_mutex);
+	if (estatus_pool)
+		goto out_unlock;
+
+	estatus_pool = gen_pool_create(ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+	if (!estatus_pool) {
+		rc = -ENOMEM;
+		goto out_unlock;
+	}
+
+	if (!num_ghes)
+		num_ghes = 1;
+
+	len = ESTATUS_CACHE_AVG_SIZE * ESTATUS_CACHE_ALLOCED_MAX;
+	len += (num_ghes * ESTATUS_ESOURCE_PREALLOC_MAX_SIZE_SIZE);
+
+	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
+	if (!addr) {
+		rc = -ENOMEM;
+		goto err_pool_alloc;
+	}
+
+	rc = gen_pool_add(estatus_pool, addr, PAGE_ALIGN(len), -1);
+	if (rc)
+		goto err_pool_add;
+
+out_unlock:
+	mutex_unlock(&estatus_pool_mutex);
+	return rc;
+
+err_pool_add:
+	vfree((void *)addr);
+err_pool_alloc:
+	gen_pool_destroy(estatus_pool);
+	estatus_pool = NULL;
+	goto out_unlock;
+}
+
+/**
+ * estatus_pool_region_free - free previously allocated memory
+ *				   from the estatus_pool.
+ * @addr: address of memory to free.
+ * @size: size of memory to free.
+ *
+ * Returns none.
+ */
+void estatus_pool_region_free(unsigned long addr, u32 size)
+{
+	gen_pool_free(estatus_pool, addr, size);
+}
+EXPORT_SYMBOL_GPL(estatus_pool_region_free);
+
+/* Check the top-level record header has an appropriate size. */
+static int __estatus_check_estatus(struct estatus_source *source,
+				   struct acpi_hest_generic_status *estatus)
+{
+	u32 len = estatus_len(estatus);
+	size_t max_len = estatus_source_max_len(source);
+
+	if (len < sizeof(*estatus)) {
+		pr_warn_ratelimited(FW_WARN ESTATUS_PFX "Truncated error status block!\n");
+		return -EIO;
+	}
+
+	if (max_len && len > max_len) {
+		pr_warn_ratelimited(FW_WARN ESTATUS_PFX "Invalid error status block length!\n");
+		return -EIO;
+	}
+
+	if (cper_estatus_check_header(estatus)) {
+		pr_warn_ratelimited(FW_WARN ESTATUS_PFX "Invalid CPER header!\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* Read the CPER block, returning its address, and header in estatus. */
+static int __estatus_peek_estatus(struct estatus_source *source,
+				  struct acpi_hest_generic_status *estatus,
+				  phys_addr_t *buf_paddr,
+				  enum fixed_addresses fixmap_idx)
+{
+	int rc;
+
+	rc = estatus_source_get_phys(source, buf_paddr);
+	if (rc) {
+		*buf_paddr = 0;
+		pr_warn_ratelimited(FW_WARN ESTATUS_PFX
+				    "Failed to get error status block address for provider %s: %d\n",
+				    estatus_source_name(source), rc);
+		return rc;
+	}
+
+	if (!*buf_paddr)
+		return -ENOENT;
+
+	rc = estatus_source_read(source, *buf_paddr, estatus,
+				 sizeof(*estatus), fixmap_idx);
+	if (rc)
+		return rc;
+
+	if (!estatus->block_status) {
+		*buf_paddr = 0;
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int __estatus_read_estatus(struct estatus_source *source,
+				  struct acpi_hest_generic_status *estatus,
+				  phys_addr_t buf_paddr,
+				  enum fixed_addresses fixmap_idx,
+				  size_t buf_len)
+{
+	int rc;
+
+	rc = estatus_source_read(source, buf_paddr, estatus, buf_len,
+				 fixmap_idx);
+	if (rc)
+		return rc;
+
+	if (cper_estatus_check(estatus)) {
+		pr_warn_ratelimited(FW_WARN ESTATUS_PFX
+				    "Failed to read error status block for provider %s!\n",
+				    estatus_source_name(source));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int estatus_read_estatus(struct estatus_source *source,
+				struct acpi_hest_generic_status *estatus,
+				phys_addr_t *buf_paddr,
+				enum fixed_addresses fixmap_idx)
+{
+	int rc;
+
+	rc = __estatus_peek_estatus(source, estatus, buf_paddr, fixmap_idx);
+	if (rc)
+		return rc;
+
+	rc = __estatus_check_estatus(source, estatus);
+	if (rc)
+		return rc;
+
+	return __estatus_read_estatus(source, estatus, *buf_paddr,
+				      fixmap_idx, estatus_len(estatus));
+}
+
+static void estatus_clear_estatus(struct estatus_source *source,
+				  struct acpi_hest_generic_status *estatus,
+				  phys_addr_t buf_paddr,
+				  enum fixed_addresses fixmap_idx)
+{
+	int rc;
+
+	estatus->block_status = 0;
+
+	if (!buf_paddr)
+		return;
+
+	rc = estatus_source_write(source, buf_paddr, estatus,
+				  sizeof(estatus->block_status), fixmap_idx);
+	if (rc)
+		pr_warn_ratelimited(FW_WARN ESTATUS_PFX
+				    "Failed to clear error status block for provider %s: %d\n",
+				    estatus_source_name(source), rc);
+
+	estatus_source_ack(source);
+}
+
+static inline int estatus_severity(int severity)
+{
+	switch (severity) {
+	case CPER_SEV_INFORMATIONAL:
+		return ESTATUS_SEV_NO;
+	case CPER_SEV_CORRECTED:
+		return ESTATUS_SEV_CORRECTED;
+	case CPER_SEV_RECOVERABLE:
+		return ESTATUS_SEV_RECOVERABLE;
+	case CPER_SEV_FATAL:
+		return ESTATUS_SEV_PANIC;
+	default:
+		/* Unknown, go panic */
+		return ESTATUS_SEV_PANIC;
+	}
+}
+
+static void __estatus_print_estatus(const char *pfx,
+				    struct estatus_source *source,
+				    const struct acpi_hest_generic_status *estatus)
+{
+	static atomic_t seqno;
+	unsigned int curr_seqno;
+	char pfx_seq[64];
+	char seq_tag[64];
+	const char *name = estatus_source_name(source);
+	const char *level = pfx;
+	char level_char = '4';
+
+	if (!level) {
+		if (estatus_severity(estatus->error_severity) <=
+		    ESTATUS_SEV_CORRECTED)
+			level = KERN_WARNING;
+		else
+			level = KERN_ERR;
+	}
+
+	if (level[0] == KERN_SOH_ASCII && level[1])
+		level_char = level[1];
+	else if (estatus_severity(estatus->error_severity) > ESTATUS_SEV_CORRECTED)
+		level_char = '3';
+
+	curr_seqno = atomic_inc_return(&seqno);
+	snprintf(seq_tag, sizeof(seq_tag), "{%u}" HW_ERR, curr_seqno);
+	snprintf(pfx_seq, sizeof(pfx_seq), "%s%s", level, seq_tag);
+	estatus_log_hw_error(level_char, seq_tag, name);
+	cper_estatus_print(pfx_seq, estatus);
+}
+
+static int estatus_print_estatus(const char *pfx,
+				 struct estatus_source *source,
+				 const struct acpi_hest_generic_status *estatus)
+{
+	/* Not more than 2 messages every 5 seconds */
+	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5 * HZ, 2);
+	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5 * HZ, 2);
+	struct ratelimit_state *ratelimit;
+
+	if (estatus_severity(estatus->error_severity) <= ESTATUS_SEV_CORRECTED)
+		ratelimit = &ratelimit_corrected;
+	else
+		ratelimit = &ratelimit_uncorrected;
+	if (__ratelimit(ratelimit)) {
+		__estatus_print_estatus(pfx, source, estatus);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+	u32 len;
+	int i, cached = 0;
+	unsigned long long now;
+	struct estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	len = estatus_len(estatus);
+	rcu_read_lock();
+	for (i = 0; i < ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(estatus_caches[i]);
+		if (!cache)
+			continue;
+		if (len != cache->estatus_len)
+			continue;
+		cache_estatus = ESTATUS_FROM_CACHE(cache);
+		if (memcmp(estatus, cache_estatus, len))
+			continue;
+		atomic_inc(&cache->count);
+		now = sched_clock();
+		if (now - cache->time_in < ESTATUS_IN_CACHE_MAX_NSEC)
+			cached = 1;
+		break;
+	}
+	rcu_read_unlock();
+	return cached;
+}
+
+static struct estatus_cache *estatus_cache_alloc(struct estatus_source *source,
+						 struct acpi_hest_generic_status *estatus)
+{
+	int alloced;
+	u32 len, cache_len;
+	struct estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	alloced = atomic_add_return(1, &estatus_cache_alloced);
+	if (alloced > ESTATUS_CACHE_ALLOCED_MAX) {
+		atomic_dec(&estatus_cache_alloced);
+		return NULL;
+	}
+	len = estatus_len(estatus);
+	cache_len = ESTATUS_CACHE_LEN(len);
+	cache = (void *)gen_pool_alloc(estatus_pool, cache_len);
+	if (!cache) {
+		atomic_dec(&estatus_cache_alloced);
+		return NULL;
+	}
+	cache_estatus = ESTATUS_FROM_CACHE(cache);
+	memcpy(cache_estatus, estatus, len);
+	cache->estatus_len = len;
+	atomic_set(&cache->count, 0);
+	cache->source = source;
+	cache->time_in = sched_clock();
+	return cache;
+}
+
+static void estatus_cache_rcu_free(struct rcu_head *head)
+{
+	struct estatus_cache *cache;
+	u32 len;
+
+	cache = container_of(head, struct estatus_cache, rcu);
+	len = estatus_len(ESTATUS_FROM_CACHE(cache));
+	len = ESTATUS_CACHE_LEN(len);
+	gen_pool_free(estatus_pool, (unsigned long)cache, len);
+	atomic_dec(&estatus_cache_alloced);
+}
+
+static void estatus_cache_add(struct estatus_source *source,
+			      struct acpi_hest_generic_status *estatus)
+{
+	unsigned long long now, duration, period, max_period = 0;
+	struct estatus_cache *cache, *new_cache;
+	struct estatus_cache __rcu *victim;
+	int i, slot = -1, count;
+
+	new_cache = estatus_cache_alloc(source, estatus);
+	if (!new_cache)
+		return;
+
+	rcu_read_lock();
+	now = sched_clock();
+	for (i = 0; i < ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(estatus_caches[i]);
+		if (!cache) {
+			slot = i;
+			break;
+		}
+		duration = now - cache->time_in;
+		if (duration >= ESTATUS_IN_CACHE_MAX_NSEC) {
+			slot = i;
+			break;
+		}
+		count = atomic_read(&cache->count);
+		period = duration;
+		do_div(period, (count + 1));
+		if (period > max_period) {
+			max_period = period;
+			slot = i;
+		}
+	}
+	rcu_read_unlock();
+
+	if (slot != -1) {
+		/*
+		 * Use release semantics to ensure that estatus_cached()
+		 * running on another CPU will see the updated cache fields if
+		 * it can see the new value of the pointer.
+		 */
+		victim = xchg_release(&estatus_caches[slot],
+				      RCU_INITIALIZER(new_cache));
+
+		/*
+		 * At this point, victim may point to a cached item different
+		 * from the one based on which we selected the slot. Instead of
+		 * going to the loop again to pick another slot, let's just
+		 * drop the other item anyway: this may cause a false cache
+		 * miss later on, but that won't cause any problems.
+		 */
+		if (victim)
+			call_rcu(&unrcu_pointer(victim)->rcu,
+				 estatus_cache_rcu_free);
+	}
+}
-- 
2.43.0




More information about the linux-arm-kernel mailing list