[PATCH 1/3] mtd: nand: Add on-die ECC support

Richard Weinberger richard at nod.at
Wed Mar 25 07:02:29 PDT 2015


Some Micron NAND chips offer an on-die ECC (AKA internal ECC)
feature. It is useful when the host-platform does not offer
multi-bit ECC and software ECC is not feasible.

Based on original work by David Mosberger <davidm at egauge.net>

Signed-off-by: Richard Weinberger <richard at nod.at>
---
 drivers/mtd/nand/nand_base.c   |  66 +++++++++-
 drivers/mtd/nand/nand_ondie.c  | 266 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h       |   6 +
 include/linux/mtd/nand_ondie.h |  40 +++++++
 4 files changed, 374 insertions(+), 4 deletions(-)
 create mode 100644 drivers/mtd/nand/nand_ondie.c
 create mode 100644 include/linux/mtd/nand_ondie.h

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index df7eb4f..92e7ed7 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -43,6 +43,7 @@
 #include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/nand_bch.h>
+#include <linux/mtd/nand_ondie.h>
 #include <linux/interrupt.h>
 #include <linux/bitops.h>
 #include <linux/leds.h>
@@ -79,6 +80,20 @@ static struct nand_ecclayout nand_oob_64 = {
 		 .length = 38} }
 };
 
+static struct nand_ecclayout nand_oob_64_on_die = {
+	.eccbytes = 32,
+	.eccpos = {
+		    8,  9, 10, 11, 12, 13, 14, 15,
+		   24, 25, 26, 27, 28, 29, 30, 31,
+		   40, 41, 42, 43, 44, 45, 46, 47,
+		   56, 57, 58, 59, 60, 61, 62, 63},
+	.oobfree = {
+		{.offset =  4,	 .length = 4},
+		{.offset = 20,	 .length = 4},
+		{.offset = 36,	 .length = 4},
+		{.offset = 52,	 .length = 4} }
+};
+
 static struct nand_ecclayout nand_oob_128 = {
 	.eccbytes = 48,
 	.eccpos = {
@@ -3115,9 +3130,10 @@ static int nand_setup_read_retry_micron(struct mtd_info *mtd, int retry_mode)
 /*
  * Configure chip properties from Micron vendor-specific ONFI table
  */
-static void nand_onfi_detect_micron(struct nand_chip *chip,
-		struct nand_onfi_params *p)
+static void nand_onfi_detect_micron(struct mtd_info *mtd,
+				    struct nand_onfi_params *p)
 {
+	struct nand_chip *chip = mtd->priv;
 	struct nand_onfi_vendor_micron *micron = (void *)p->vendor;
 
 	if (le16_to_cpu(p->vendor_revision) < 1)
@@ -3125,6 +3141,8 @@ static void nand_onfi_detect_micron(struct nand_chip *chip,
 
 	chip->read_retries = micron->read_retry_options;
 	chip->setup_read_retry = nand_setup_read_retry_micron;
+
+	nand_onfi_detect_on_die_ecc(mtd);
 }
 
 /*
@@ -3226,7 +3244,7 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 	}
 
 	if (p->jedec_id == NAND_MFR_MICRON)
-		nand_onfi_detect_micron(chip, p);
+		nand_onfi_detect_micron(mtd, p);
 
 	return 1;
 }
@@ -4056,6 +4074,40 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 		break;
 
+	case NAND_ECC_HW_ON_DIE:
+		if (!mtd_nand_has_on_die()) {
+			pr_warn("CONFIG_MTD_NAND_ECC_ON_DIE not enabled\n");
+			BUG();
+		}
+		/*
+		 * nand_bbt.c by default puts the BBT marker at
+		 * offset 8 in OOB, which is used for ECC (see
+		 * nand_oob_64_on_die).
+		 * Fixed by not using OOB for BBT marker.
+		 */
+		chip->bbt_options |= NAND_BBT_NO_OOB;
+		ecc->layout = &nand_oob_64_on_die;
+		ecc->read_page = nand_read_page_on_die;
+		ecc->read_subpage = nand_read_subpage_on_die;
+		ecc->write_page = nand_write_page_raw;
+		ecc->read_oob = nand_read_oob_std;
+		ecc->read_page_raw = nand_read_page_raw;
+		ecc->write_page_raw = nand_write_page_raw;
+		ecc->write_oob = nand_write_oob_std;
+		ecc->size = 512;
+		ecc->bytes = 8;
+		ecc->strength = 4;
+		ecc->priv = nand_on_die_init(mtd);
+		if (!ecc->priv) {
+			pr_warn("On-Die ECC initialization failed!\n");
+			BUG();
+		}
+		if (nand_setup_on_die_ecc_micron(mtd, 1) != 0) {
+			pr_warn("Unable to enable on-die ECC!\n");
+			BUG();
+		}
+		break;
+
 	case NAND_ECC_NONE:
 		pr_warn("NAND_ECC_NONE selected by board driver. This is not recommended!\n");
 		ecc->read_page = nand_read_page_raw;
@@ -4127,10 +4179,14 @@ int nand_scan_tail(struct mtd_info *mtd)
 	/* Invalidate the pagebuffer reference */
 	chip->pagebuf = -1;
 
-	/* Large page NAND with SOFT_ECC should support subpage reads */
+	/*
+	 * Large page NAND with SOFT_ECC or on-die ECC should support
+	 * subpage reads.
+	 */
 	switch (ecc->mode) {
 	case NAND_ECC_SOFT:
 	case NAND_ECC_SOFT_BCH:
+	case NAND_ECC_HW_ON_DIE:
 		if (chip->page_shift > 9)
 			chip->options |= NAND_SUBPAGE_READ;
 		break;
@@ -4232,6 +4288,8 @@ void nand_release(struct mtd_info *mtd)
 
 	if (chip->ecc.mode == NAND_ECC_SOFT_BCH)
 		nand_bch_free((struct nand_bch_control *)chip->ecc.priv);
+	else if (chip->ecc.mode == NAND_ECC_HW_ON_DIE)
+		nand_on_die_destroy(chip->ecc.priv);
 
 	mtd_device_unregister(mtd);
 
diff --git a/drivers/mtd/nand/nand_ondie.c b/drivers/mtd/nand/nand_ondie.c
new file mode 100644
index 0000000..4147ef5
--- /dev/null
+++ b/drivers/mtd/nand/nand_ondie.c
@@ -0,0 +1,266 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/nand_ondie.h>
+
+/**
+ * struct nand_on_die_control - private NAND on-die ECC control structure
+ *
+ * As on-die ECC does not report the number of changed bits we have to
+ * count them on our own. If on-die ECC reports flipped bits we re-read the
+ * whole page into @chkbuf and then again with ECC disabled into @rawbuf.
+ * By comparing these buffers the number of changed bits can be identified.
+ *
+ * @chkbuf: Buffer to store a whole page
+ * @rawbuf: Buffer to store a whole page with ECC disabled
+ */
+struct nand_on_die_control {
+	uint8_t *chkbuf;
+	uint8_t *rawbuf;
+};
+
+static int check_for_bitflips(struct mtd_info *mtd, int page)
+{
+	int flips = 0, max_bitflips = 0, i, j, read_size;
+	uint8_t *chkbuf, *rawbuf, *chkoob, *rawoob;
+	struct nand_chip *chip = mtd->priv;
+	struct nand_on_die_control *on_die_ctrl = chip->ecc.priv;
+	uint32_t *eccpos;
+
+	chkbuf = on_die_ctrl->chkbuf;
+	rawbuf = on_die_ctrl->rawbuf;
+	read_size = mtd->writesize + mtd->oobsize;
+
+	/* Read entire page w/OOB area with on-die ECC on */
+	chip->read_buf(mtd, chkbuf, read_size);
+
+	/* Re-read page with on-die ECC off */
+	nand_setup_on_die_ecc_micron(mtd, 0);
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+	chip->read_buf(mtd, rawbuf, read_size);
+	nand_setup_on_die_ecc_micron(mtd, 1);
+
+	chkoob = chkbuf + mtd->writesize;
+	rawoob = rawbuf + mtd->writesize;
+	eccpos = chip->ecc.layout->eccpos;
+	for (i = 0; i < chip->ecc.steps; ++i) {
+		/* Count bit flips in the actual data area */
+		flips = 0;
+		for (j = 0; j < chip->ecc.size; ++j)
+			flips += hweight8(chkbuf[j] ^ rawbuf[j]);
+		/* Count bit flips in the ECC bytes */
+		for (j = 0; j < chip->ecc.bytes; ++j) {
+			flips += hweight8(chkoob[*eccpos] ^ rawoob[*eccpos]);
+			++eccpos;
+		}
+		if (flips > 0)
+			mtd->ecc_stats.corrected += flips;
+		max_bitflips = max_t(int, max_bitflips, flips);
+		chkbuf += chip->ecc.size;
+		rawbuf += chip->ecc.size;
+	}
+
+	/* Re-issue the READ command for the actual data read that follows.  */
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+
+	return max_bitflips;
+}
+
+static int check_read_status_on_die(struct mtd_info *mtd, int page)
+{
+	struct nand_chip *chip = mtd->priv;
+	int max_bitflips = 0;
+	uint8_t status;
+
+	/* Check ECC status of page just transferred into NAND's page buffer */
+	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+	status = chip->read_byte(mtd);
+
+	/* Switch back to data reading */
+	chip->cmd_ctrl(mtd, NAND_CMD_READ0, NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
+	chip->cmd_ctrl(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+
+	if (status & NAND_STATUS_FAIL) {
+		/* Page has invalid ECC */
+		mtd->ecc_stats.failed++;
+	} else if (status & NAND_STATUS_REWRITE) {
+		/*
+		 * Micron on-die ECC doesn't report the number of
+		 * bitflips, so we have to count them ourself to see
+		 * if the error rate is too high.  This is
+		 * particularly important for pages with stuck bits.
+		 */
+		max_bitflips = check_for_bitflips(mtd, page);
+	}
+	return max_bitflips;
+}
+
+
+/**
+ * nand_read_page_on_die - [INTERN] read raw page data without ecc
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
+ * @page: page number to read
+ */
+int nand_read_page_on_die(struct mtd_info *mtd, struct nand_chip *chip,
+				 uint8_t *buf, int oob_required, int page)
+{
+	uint32_t failed = mtd->ecc_stats.failed;
+	int ret;
+
+	ret = check_read_status_on_die(mtd, page);
+	if (ret < 0 || mtd->ecc_stats.failed != failed)
+		return ret;
+
+	chip->read_buf(mtd, buf, mtd->writesize);
+	if (oob_required)
+		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	return ret;
+}
+
+/**
+ * nand_read_subpage - [INTERN] on-die-ECC based sub-page read function
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @data_offs: offset of requested data within the page
+ * @readlen: data length
+ * @bufpoi: buffer to store read data
+ * @page: page number to read
+ */
+int nand_read_subpage_on_die(struct mtd_info *mtd,
+				    struct nand_chip *chip,
+				    uint32_t data_offs, uint32_t readlen,
+				    uint8_t *bufpoi, int page)
+{
+	int start_step, end_step, num_steps, ret;
+	int data_col_addr;
+	int datafrag_len;
+	uint32_t failed;
+	uint8_t *p;
+
+	/* Column address within the page aligned to ECC size */
+	start_step = data_offs / chip->ecc.size;
+	end_step = (data_offs + readlen - 1) / chip->ecc.size;
+	num_steps = end_step - start_step + 1;
+
+	/* Data size aligned to ECC ecc.size */
+	datafrag_len = num_steps * chip->ecc.size;
+	data_col_addr = start_step * chip->ecc.size;
+	p = bufpoi + data_col_addr;
+
+	failed = mtd->ecc_stats.failed;
+
+	ret = check_read_status_on_die(mtd, page);
+	if (ret < 0 || mtd->ecc_stats.failed != failed) {
+		memset(p, 0, datafrag_len);
+		return ret;
+	}
+
+	/* If we read not a page aligned data */
+	if (data_col_addr != 0)
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, data_col_addr, -1);
+
+	chip->read_buf(mtd, p, datafrag_len);
+
+	return ret;
+}
+
+/**
+ * nand_on_die_init - Initialize on-die ECC private data
+ *
+ * @mtd: mtd info structure
+ *
+ * Returns NULL in case of an error.
+ */
+struct nand_on_die_control *nand_on_die_init(struct mtd_info *mtd)
+{
+	struct nand_on_die_control *ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+
+	if (!ctrl)
+		goto out;
+
+	ctrl->chkbuf = kzalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL);
+	ctrl->rawbuf = kzalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL);
+
+	if (!ctrl->chkbuf || !ctrl->rawbuf) {
+		kfree(ctrl->chkbuf);
+		kfree(ctrl->rawbuf);
+		kfree(ctrl);
+		ctrl = NULL;
+	}
+
+out:
+	return ctrl;
+}
+
+/**
+ * nand_on_die_destroy - Destroy on-die ECC private data
+ *
+ * @ctrl: on-die ECC private data
+ */
+void nand_on_die_destroy(struct nand_on_die_control *ctrl)
+{
+	kfree(ctrl->chkbuf);
+	kfree(ctrl->rawbuf);
+	kfree(ctrl);
+}
+
+static int __nand_setup_on_die_ecc_micron(struct mtd_info *mtd, int enable)
+{
+	struct nand_chip *chip = mtd->priv;
+	uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {0};
+
+	feature[0] = enable ? ONFI_FEATURE_ENABLE_ON_DIE_ECC : 0;
+
+	return chip->onfi_set_features(mtd, chip,
+		     ONFI_FEATURE_ADDR_ARRAY_OP_MODE, feature);
+}
+
+/**
+ * nand_setup_on_die_ecc_micron - Enable/Disable on-die ECC mode
+ *
+ * @mtd: mtd info structure
+ * @enable: enables on-die ECC if non-zero
+ */
+int nand_setup_on_die_ecc_micron(struct mtd_info *mtd, int enable)
+{
+	int ret;
+	struct nand_chip *chip = mtd->priv;
+
+	if (chip->ecc.mode != NAND_ECC_HW_ON_DIE)
+		return 0;
+
+	ret = __nand_setup_on_die_ecc_micron(mtd, enable);
+	if (ret)
+		pr_err("Unable to %sable on-die ECC!\n", enable ? "en" : "dis");
+	return ret;
+}
+
+/**
+ * nand_onfi_detect_on_die_ecc - Detect and handle on-die ECC
+ *
+ * @mtd: mtd info structure
+ */
+void nand_onfi_detect_on_die_ecc(struct mtd_info *mtd)
+{
+	u8 features[ONFI_SUBFEATURE_PARAM_LEN];
+	struct nand_chip *chip = mtd->priv;
+
+	if (chip->onfi_get_features(mtd, chip, ONFI_FEATURE_ADDR_ARRAY_OP_MODE,
+				    features) < 0)
+		return;
+
+	if (features[0] & ONFI_FEATURE_ENABLE_ON_DIE_ECC &&
+		chip->ecc.mode != NAND_ECC_HW_ON_DIE) {
+
+		pr_info("Requested ECC method is not on-die, disabling on-die ECC\n");
+		__nand_setup_on_die_ecc_micron(mtd, 0);
+	}
+}
+
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 3d4ea7e..5216704 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -101,6 +101,7 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 /* Status bits */
 #define NAND_STATUS_FAIL	0x01
 #define NAND_STATUS_FAIL_N1	0x02
+#define NAND_STATUS_REWRITE	0x08
 #define NAND_STATUS_TRUE_READY	0x20
 #define NAND_STATUS_READY	0x40
 #define NAND_STATUS_WP		0x80
@@ -115,6 +116,7 @@ typedef enum {
 	NAND_ECC_HW_SYNDROME,
 	NAND_ECC_HW_OOB_FIRST,
 	NAND_ECC_SOFT_BCH,
+	NAND_ECC_HW_ON_DIE,
 } nand_ecc_modes_t;
 
 /*
@@ -219,6 +221,10 @@ struct nand_chip;
 /* Vendor-specific feature address (Micron) */
 #define ONFI_FEATURE_ADDR_READ_RETRY	0x89
 
+/* Vendor-specific array operation mode (Micron) */
+#define ONFI_FEATURE_ADDR_ARRAY_OP_MODE	0x90
+#define ONFI_FEATURE_ENABLE_ON_DIE_ECC	0x08
+
 /* ONFI subfeature parameters length */
 #define ONFI_SUBFEATURE_PARAM_LEN	4
 
diff --git a/include/linux/mtd/nand_ondie.h b/include/linux/mtd/nand_ondie.h
new file mode 100644
index 0000000..b2ec2da
--- /dev/null
+++ b/include/linux/mtd/nand_ondie.h
@@ -0,0 +1,40 @@
+#ifndef __MTD_NAND_ONDIE_H__
+#define __MTD_NAND_ONDIE_H__
+
+struct nand_on_die_control;
+
+#if defined(CONFIG_MTD_NAND_ECC_ON_DIE)
+static inline int mtd_nand_has_on_die(void) { return 1; }
+int nand_read_subpage_on_die(struct mtd_info *mtd,
+				    struct nand_chip *chip,
+				    uint32_t data_offs, uint32_t readlen,
+				    uint8_t *bufpoi, int page);
+
+int nand_read_page_on_die(struct mtd_info *mtd, struct nand_chip *chip,
+			  uint8_t *buf, int oob_required, int page);
+void nand_onfi_detect_on_die_ecc(struct mtd_info *mtd);
+int nand_setup_on_die_ecc_micron(struct mtd_info *mtd, int enable);
+struct nand_on_die_control *nand_on_die_init(struct mtd_info *mtd);
+void nand_on_die_destroy(struct nand_on_die_control *ctrl);
+#else /* !CONFIG_MTD_NAND_ECC_ON_DIE */
+static inline int mtd_nand_has_on_die(void) { return 0; }
+static inline int nand_read_subpage_on_die(struct mtd_info *mtd,
+					   struct nand_chip *chip,
+					   uint32_t data_offs,
+					   uint32_t readlen,
+					   uint8_t *bufpoi, int page)
+{
+	BUG();
+}
+
+static inline int nand_read_page_on_die(struct mtd_info *mtd, struct nand_chip *chip,
+					uint8_t *buf, int oob_required, int page)
+{
+	BUG();
+}
+static inline void nand_onfi_detect_on_die_ecc(struct mtd_info *mtd) { }
+static inline int nand_setup_on_die_ecc_micron(struct mtd_info *mtd, int enable) { return -EINVAL; }
+static inline struct nand_on_die_control *nand_on_die_init(struct mtd_info *mtd) { return NULL; }
+static inline void nand_on_die_destroy(struct nand_on_die_control *ctrl) { }
+#endif /* CONFIG_MTD_NAND_ECC_ON_DIE */
+#endif /* __MTD_NAND_ONDIE_H__ */
-- 
2.3.4




More information about the linux-mtd mailing list