[PATCH AUTOSEL 6.16 28/85] EDAC/synopsys: Clear the ECC counters on init
Sasha Levin
sashal at kernel.org
Sun Aug 3 17:22:37 PDT 2025
From: Shubhrajyoti Datta <shubhrajyoti.datta at amd.com>
[ Upstream commit b1dc7f097b78eb8d25b071ead2384b07a549692b ]
Clear the ECC error and counter registers during initialization/probe to avoid
reporting stale errors that may have occurred before EDAC registration.
For that, unify the Zynq and ZynqMP ECC state reading paths and simplify the
code.
[ bp: Massage commit message.
Fix an -Wsometimes-uninitialized warning as reported by
Reported-by: kernel test robot <lkp at intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202507141048.obUv3ZUm-lkp@intel.com ]
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta at amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp at alien8.de>
Link: https://lore.kernel.org/20250713050753.7042-1-shubhrajyoti.datta@amd.com
Signed-off-by: Sasha Levin <sashal at kernel.org>
---
LLM Generated explanations, may be completely bogus:
**Backport Status: YES**
This commit is suitable for backporting to stable kernel trees based on
the following analysis:
## Bug Fix Nature
The commit fixes a clear bug where the EDAC driver could report stale
ECC errors from before the driver was initialized. This is evident from
the commit message: "Clear the ECC error and counter registers during
initialization/probe to avoid reporting stale errors that may have
occurred before EDAC registration."
## Code Changes Analysis
1. **Problem Being Fixed**:
- The driver wasn't clearing ECC error counters during initialization
- This could lead to false error reports from pre-boot or pre-driver-
load errors
- Users could see incorrect ECC error counts that don't reflect
actual runtime errors
2. **Solution Implementation**:
- The patch consolidates the ECC state checking into a single
`get_ecc_state()` function
- Adds counter clearing logic for both Zynq and ZynqMP platforms:
```c
// For Zynq:
clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_UE_ERR;
writel(clearval, priv->baseaddr + ECC_CTRL_OFST);
writel(0x0, priv->baseaddr + ECC_CTRL_OFST);
// For ZynqMP:
clearval = readl(priv->baseaddr + ECC_CLR_OFST) |
ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT |
ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
writel(clearval, priv->baseaddr + ECC_CLR_OFST);
```
3. **Scope and Risk Assessment**:
- Changes are confined to the synopsys_edac driver
- No API changes or interface modifications
- The refactoring simplifies code by removing duplicate
`zynq_get_ecc_state()` and `zynqmp_get_ecc_state()` functions
- Adds platform type enum for cleaner platform identification
- Risk is minimal as it only affects initialization path
4. **Stable Tree Criteria Compliance**:
- ✓ Fixes a real bug (stale error reporting)
- ✓ Small and contained changes (~150 lines)
- ✓ No new features added
- ✓ No architectural changes
- ✓ Low regression risk
- ✓ Clear fix for a specific problem
5. **Additional Evidence**:
- The maintainer (Borislav Petkov) accepted and massaged the patch
- Fixed a compiler warning caught by kernel test robot
- The bug could affect production systems by showing false ECC errors
This is a textbook example of a stable-worthy patch: it fixes a specific
bug (stale ECC error reporting) with minimal, focused changes that don't
introduce new functionality or risk.
drivers/edac/synopsys_edac.c | 97 +++++++++++++++++-------------------
1 file changed, 46 insertions(+), 51 deletions(-)
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 5ed32a3299c4..51143b3257de 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -332,20 +332,26 @@ struct synps_edac_priv {
#endif
};
+enum synps_platform_type {
+ ZYNQ,
+ ZYNQMP,
+ SYNPS,
+};
+
/**
* struct synps_platform_data - synps platform data structure.
+ * @platform: Identifies the target hardware platform
* @get_error_info: Get EDAC error info.
* @get_mtype: Get mtype.
* @get_dtype: Get dtype.
- * @get_ecc_state: Get ECC state.
* @get_mem_info: Get EDAC memory info
* @quirks: To differentiate IPs.
*/
struct synps_platform_data {
+ enum synps_platform_type platform;
int (*get_error_info)(struct synps_edac_priv *priv);
enum mem_type (*get_mtype)(const void __iomem *base);
enum dev_type (*get_dtype)(const void __iomem *base);
- bool (*get_ecc_state)(void __iomem *base);
#ifdef CONFIG_EDAC_DEBUG
u64 (*get_mem_info)(struct synps_edac_priv *priv);
#endif
@@ -720,51 +726,38 @@ static enum dev_type zynqmp_get_dtype(const void __iomem *base)
return dt;
}
-/**
- * zynq_get_ecc_state - Return the controller ECC enable/disable status.
- * @base: DDR memory controller base address.
- *
- * Get the ECC enable/disable status of the controller.
- *
- * Return: true if enabled, otherwise false.
- */
-static bool zynq_get_ecc_state(void __iomem *base)
+static bool get_ecc_state(struct synps_edac_priv *priv)
{
+ u32 ecctype, clearval;
enum dev_type dt;
- u32 ecctype;
-
- dt = zynq_get_dtype(base);
- if (dt == DEV_UNKNOWN)
- return false;
- ecctype = readl(base + SCRUB_OFST) & SCRUB_MODE_MASK;
- if ((ecctype == SCRUB_MODE_SECDED) && (dt == DEV_X2))
- return true;
-
- return false;
-}
-
-/**
- * zynqmp_get_ecc_state - Return the controller ECC enable/disable status.
- * @base: DDR memory controller base address.
- *
- * Get the ECC enable/disable status for the controller.
- *
- * Return: a ECC status boolean i.e true/false - enabled/disabled.
- */
-static bool zynqmp_get_ecc_state(void __iomem *base)
-{
- enum dev_type dt;
- u32 ecctype;
-
- dt = zynqmp_get_dtype(base);
- if (dt == DEV_UNKNOWN)
- return false;
-
- ecctype = readl(base + ECC_CFG0_OFST) & SCRUB_MODE_MASK;
- if ((ecctype == SCRUB_MODE_SECDED) &&
- ((dt == DEV_X2) || (dt == DEV_X4) || (dt == DEV_X8)))
- return true;
+ if (priv->p_data->platform == ZYNQ) {
+ dt = zynq_get_dtype(priv->baseaddr);
+ if (dt == DEV_UNKNOWN)
+ return false;
+
+ ecctype = readl(priv->baseaddr + SCRUB_OFST) & SCRUB_MODE_MASK;
+ if (ecctype == SCRUB_MODE_SECDED && dt == DEV_X2) {
+ clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_UE_ERR;
+ writel(clearval, priv->baseaddr + ECC_CTRL_OFST);
+ writel(0x0, priv->baseaddr + ECC_CTRL_OFST);
+ return true;
+ }
+ } else {
+ dt = zynqmp_get_dtype(priv->baseaddr);
+ if (dt == DEV_UNKNOWN)
+ return false;
+
+ ecctype = readl(priv->baseaddr + ECC_CFG0_OFST) & SCRUB_MODE_MASK;
+ if (ecctype == SCRUB_MODE_SECDED &&
+ (dt == DEV_X2 || dt == DEV_X4 || dt == DEV_X8)) {
+ clearval = readl(priv->baseaddr + ECC_CLR_OFST) |
+ ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT |
+ ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
+ writel(clearval, priv->baseaddr + ECC_CLR_OFST);
+ return true;
+ }
+ }
return false;
}
@@ -934,18 +927,18 @@ static int setup_irq(struct mem_ctl_info *mci,
}
static const struct synps_platform_data zynq_edac_def = {
+ .platform = ZYNQ,
.get_error_info = zynq_get_error_info,
.get_mtype = zynq_get_mtype,
.get_dtype = zynq_get_dtype,
- .get_ecc_state = zynq_get_ecc_state,
.quirks = 0,
};
static const struct synps_platform_data zynqmp_edac_def = {
+ .platform = ZYNQMP,
.get_error_info = zynqmp_get_error_info,
.get_mtype = zynqmp_get_mtype,
.get_dtype = zynqmp_get_dtype,
- .get_ecc_state = zynqmp_get_ecc_state,
#ifdef CONFIG_EDAC_DEBUG
.get_mem_info = zynqmp_get_mem_info,
#endif
@@ -957,10 +950,10 @@ static const struct synps_platform_data zynqmp_edac_def = {
};
static const struct synps_platform_data synopsys_edac_def = {
+ .platform = SYNPS,
.get_error_info = zynqmp_get_error_info,
.get_mtype = zynqmp_get_mtype,
.get_dtype = zynqmp_get_dtype,
- .get_ecc_state = zynqmp_get_ecc_state,
.quirks = (DDR_ECC_INTR_SUPPORT | DDR_ECC_INTR_SELF_CLEAR
#ifdef CONFIG_EDAC_DEBUG
| DDR_ECC_DATA_POISON_SUPPORT
@@ -1390,10 +1383,6 @@ static int mc_probe(struct platform_device *pdev)
if (!p_data)
return -ENODEV;
- if (!p_data->get_ecc_state(baseaddr)) {
- edac_printk(KERN_INFO, EDAC_MC, "ECC not enabled\n");
- return -ENXIO;
- }
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = SYNPS_EDAC_NR_CSROWS;
@@ -1413,6 +1402,12 @@ static int mc_probe(struct platform_device *pdev)
priv = mci->pvt_info;
priv->baseaddr = baseaddr;
priv->p_data = p_data;
+ if (!get_ecc_state(priv)) {
+ edac_printk(KERN_INFO, EDAC_MC, "ECC not enabled\n");
+ rc = -ENODEV;
+ goto free_edac_mc;
+ }
+
spin_lock_init(&priv->reglock);
mc_init(mci, pdev);
--
2.39.5
More information about the linux-arm-kernel
mailing list