[PATCH 1/2] fsl-quadspi: fix QUAD read, add NORMAL, DUAL and FAST reads
Han Xu
xhnjupt at gmail.com
Wed Sep 28 13:06:52 PDT 2016
On Tue, Sep 27, 2016 at 07:59:56AM +0200, Albert ARIBAUD (3ADEV) wrote:
> Fix QUAD read LUT sequence missing JMP_ON_CS which caused
> read corruptions with non-1K-size reads on BK4R1 machine.
>
> Add NORMAL, DUAL and FAST read sequences.
>
> Introduce spi-bus-width property for bus subnodes, to
> specify per-bus capability to use NORMAL, FAST, DUAL,
> and/or QUAD reads.
>
> Also:
>
> Simplify ioremap/memcpy usage.
>
> Make controller clock frequency equal to the lowest of
> subnode max frequencies instead of the last one.
>
> Limit Vybrid to use only half its RX fifo, using more will
> cause corrupt reads.
>
Too much changes in one patch, need to split to several patches.
> Signed-off-by: Albert ARIBAUD (3ADEV) <albert.aribaud at 3adev.fr>
> ---
> drivers/mtd/spi-nor/fsl-quadspi.c | 204 ++++++++++++++++++++++++++++----------
> 1 file changed, 152 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
> index 5c82e4e..9811cf2 100644
> --- a/drivers/mtd/spi-nor/fsl-quadspi.c
> +++ b/drivers/mtd/spi-nor/fsl-quadspi.c
> @@ -41,6 +41,8 @@
> #define QUADSPI_QUIRK_TKT253890 (1 << 2)
> /* Controller cannot wake up from wait mode, TKT245618 */
> #define QUADSPI_QUIRK_TKT245618 (1 << 3)
> +/* Controller can only read half a rx fifo through AHB */
> +#define QUADSPI_QUIRK_AHB_READ_HALF_FIFO (1 << 4)
>
> /* The registers */
> #define QUADSPI_MCR 0x00
> @@ -117,6 +119,10 @@
> #define QUADSPI_FR 0x160
> #define QUADSPI_FR_TFF_MASK 0x1
>
> +#define QUADSPI_SPTRCLR 0x15c
> +#define QUADSPI_SPTRCLR_BFPTRC_SHIFT 0
> +#define QUADSPI_SPTRCLR_BFPTRC_MASK (0x1 << QUADSPI_SPTRCLR_BFPTRC_SHIFT)
> +
offset 0x15c is SR register for i.MX, not sure where does SPTRCLR come from
> #define QUADSPI_SFA1AD 0x180
> #define QUADSPI_SFA2AD 0x184
> #define QUADSPI_SFB1AD 0x188
> @@ -193,7 +199,7 @@
> #define QUADSPI_LUT_NUM 64
>
> /* SEQID -- we can have 16 seqids at most. */
> -#define SEQID_QUAD_READ 0
> +#define SEQID_READ 0
> #define SEQID_WREN 1
> #define SEQID_WRDI 2
> #define SEQID_RDSR 3
> @@ -205,6 +211,9 @@
> #define SEQID_RDCR 9
> #define SEQID_EN4B 10
> #define SEQID_BRWR 11
> +#define SEQID_FAST_READ 12
> +#define SEQID_DUAL_READ 13
> +#define SEQID_QUAD_READ 14
>
> #define QUADSPI_MIN_IOMAP SZ_4M
>
> @@ -229,7 +238,8 @@ static struct fsl_qspi_devtype_data vybrid_data = {
> .rxfifo = 128,
> .txfifo = 64,
> .ahb_buf_size = 1024,
> - .driver_data = QUADSPI_QUIRK_SWAP_ENDIAN,
> + .driver_data = QUADSPI_QUIRK_SWAP_ENDIAN
> + | QUADSPI_QUIRK_AHB_READ_HALF_FIFO,
> };
>
> static struct fsl_qspi_devtype_data imx6sx_data = {
> @@ -309,6 +319,11 @@ static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
> return q->devtype_data->driver_data & QUADSPI_QUIRK_TKT245618;
> }
>
> +static inline int needs_half_fifo(struct fsl_qspi *q)
> +{
> + return q->devtype_data->driver_data & QUADSPI_QUIRK_AHB_READ_HALF_FIFO;
> +}
> +
> /*
> * R/W functions for big- or little-endian registers:
> * The qSPI controller's endian is independent of the CPU core's endian.
> @@ -373,8 +388,20 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
> void __iomem *base = q->iobase;
> int rxfifo = q->devtype_data->rxfifo;
> u32 lut_base;
> - u8 cmd, addrlen, dummy;
> + u8 cmd, addrlen;
> int i;
> + /* use 8 dummy cycles for all fast operations */
> + const int dummy = 8;
> +
> + /* use only half fifo if controller needs that */
> + if (needs_half_fifo(q))
> + rxfifo /= 2;
> +
> + /* use 24-bit addresses for up to 16MB, 32-bit above 16MB */
> + if (q->nor_size <= SZ_16M)
> + addrlen = ADDR24BIT;
> + else
> + addrlen = ADDR32BIT;
A better patch fetch info from nor structure, refer to
https://patchwork.ozlabs.org/patch/613429/
>
> fsl_qspi_unlock_lut(q);
>
> @@ -382,24 +409,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
> for (i = 0; i < QUADSPI_LUT_NUM; i++)
> qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
>
> - /* Quad Read */
> - lut_base = SEQID_QUAD_READ * 4;
> -
> - if (q->nor_size <= SZ_16M) {
> - cmd = SPINOR_OP_READ_1_1_4;
> - addrlen = ADDR24BIT;
> - dummy = 8;
> - } else {
> - /* use the 4-byte address */
> - cmd = SPINOR_OP_READ_1_1_4;
> - addrlen = ADDR32BIT;
> - dummy = 8;
> - }
> -
> - qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> - base + QUADSPI_LUT(lut_base));
> - qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> - base + QUADSPI_LUT(lut_base + 1));
> + /* Read */
> + lut_base = SEQID_READ * 4;
> + qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ) | LUT1(ADDR, PAD1, addrlen),
> + base + QUADSPI_LUT(lut_base));
> + qspi_writel(q, LUT0(FSL_READ, PAD1, rxfifo) | LUT1(JMP_ON_CS, PAD1, SEQID_READ * 8),
> + base + QUADSPI_LUT(lut_base + 1));
>
> /* Write enable */
> lut_base = SEQID_WREN * 4;
> @@ -409,16 +424,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
> /* Page Program */
> lut_base = SEQID_PP * 4;
>
> - if (q->nor_size <= SZ_16M) {
> - cmd = SPINOR_OP_PP;
> - addrlen = ADDR24BIT;
> - } else {
> - /* use the 4-byte address */
> - cmd = SPINOR_OP_PP;
> - addrlen = ADDR32BIT;
> - }
> -
> - qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> + qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_PP) | LUT1(ADDR, PAD1, addrlen),
> base + QUADSPI_LUT(lut_base));
> qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
> base + QUADSPI_LUT(lut_base + 1));
> @@ -431,7 +437,6 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>
> /* Erase a sector */
> lut_base = SEQID_SE * 4;
> -
> cmd = q->nor[0].erase_opcode;
> addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
>
> @@ -476,6 +481,33 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
> qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_BRWR),
> base + QUADSPI_LUT(lut_base));
>
> + /* Fast Read */
> + lut_base = SEQID_FAST_READ * 4;
> + qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_FAST) | LUT1(ADDR, PAD1, addrlen),
> + base + QUADSPI_LUT(lut_base));
> + qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD1, rxfifo),
> + base + QUADSPI_LUT(lut_base + 1));
> + qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_FAST_READ * 8),
> + base + QUADSPI_LUT(lut_base + 2));
> +
> + /* Dual Read */
> + lut_base = SEQID_DUAL_READ * 4;
> + qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_2) | LUT1(ADDR, PAD1, addrlen),
> + base + QUADSPI_LUT(lut_base));
> + qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD2, rxfifo),
> + base + QUADSPI_LUT(lut_base + 1));
> + qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_DUAL_READ * 8),
> + base + QUADSPI_LUT(lut_base + 2));
> +
> + /* Quad Read */
> + lut_base = SEQID_QUAD_READ * 4;
> + qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_4) | LUT1(ADDR, PAD1, addrlen),
> + base + QUADSPI_LUT(lut_base));
> + qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> + base + QUADSPI_LUT(lut_base + 1));
> + qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_QUAD_READ * 8),
> + base + QUADSPI_LUT(lut_base + 2));
> +
> fsl_qspi_lock_lut(q);
> }
>
> @@ -483,8 +515,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
> static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
> {
> switch (cmd) {
> - case SPINOR_OP_READ_1_1_4:
> - return SEQID_QUAD_READ;
> + case SPINOR_OP_READ:
> + return SEQID_READ;
> case SPINOR_OP_WREN:
> return SEQID_WREN;
> case SPINOR_OP_WRDI:
> @@ -507,6 +539,12 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
> return SEQID_EN4B;
> case SPINOR_OP_BRWR:
> return SEQID_BRWR;
> + case SPINOR_OP_READ_FAST:
> + return SEQID_FAST_READ;
> + case SPINOR_OP_READ_1_1_2:
> + return SEQID_DUAL_READ;
> + case SPINOR_OP_READ_1_1_4:
> + return SEQID_QUAD_READ;
> default:
> if (cmd == q->nor[0].erase_opcode)
> return SEQID_SE;
> @@ -676,11 +714,16 @@ static void fsl_qspi_set_map_addr(struct fsl_qspi *q)
> * the memcpy to read the data directly. A "missed" access to the buffer
> * causes the controller to clear the buffer, and use the sequence pointed
> * by the QUADSPI_BFGENCR[SEQID] to initiate a read from the flash.
> + *
> + * When using two ports, the SEQID to use for one port might differ from
> + * the one to use for the other (e.g., if one port can do 4-pad reads but
> + * the other cannot). So we set up a basic mode here (SEQID_READ) and we
> + * will set up the proper SEQID for the port right before doing the AHB
> + * access(es).
> */
It's rare to find this use case, considering the amount of lut is only 16,
please don't add two many luts in this way. I will send a patch soon for
dynamic lut change, please add these extra luts in dynamic lut list.
> static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
> {
> void __iomem *base = q->iobase;
> - int seqid;
>
> /* AHB configuration for access buffer 0/1/2 .*/
> qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
> @@ -688,11 +731,13 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
> qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
> /*
> * Set ADATSZ with the maximum AHB buffer size to improve the
> - * read performance.
> + * read performance, except when the controller should not use
> + * more than half its RX fifo in AHB reads, in which case read
> + * size is given in the LUT FSL_READ instructions.
> */
> qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
> - ((q->devtype_data->ahb_buf_size / 8)
> - << QUADSPI_BUF3CR_ADATSZ_SHIFT),
> + (needs_half_fifo(q)? 0 : q->devtype_data->ahb_buf_size / 8)
> + << QUADSPI_BUF3CR_ADATSZ_SHIFT,
> base + QUADSPI_BUF3CR);
>
> /* We only use the buffer3 */
> @@ -700,9 +745,8 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
> qspi_writel(q, 0, base + QUADSPI_BUF1IND);
> qspi_writel(q, 0, base + QUADSPI_BUF2IND);
>
> - /* Set the default lut sequence for AHB Read. */
> - seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
> - qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> + /* Set the default lut sequence for AHB Read to READ. */
> + qspi_writel(q, SEQID_READ << QUADSPI_BFGENCR_SEQID_SHIFT,
> q->iobase + QUADSPI_BFGENCR);
> }
>
> @@ -841,6 +885,7 @@ static int fsl_qspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
> return ret;
>
> fsl_qspi_read_data(q, len, buf);
> +
> return 0;
> }
>
> @@ -887,11 +932,27 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
> {
> struct fsl_qspi *q = nor->priv;
> u8 cmd = nor->read_opcode;
> + uint32_t seqid;
> + size_t qlen = q->nor_size * 4;
> + int nor_idx = nor - q->nor;
> + size_t nor_ofs = q->nor_size * nor_idx;
> +
> + /* Set the actual lut sequence for AHB Read from the considered nor. */
> + seqid = fsl_qspi_get_seqid(q, nor->read_opcode);
> + qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> + q->iobase + QUADSPI_BFGENCR);
> + /* Reset the AHB sequence pointer */
> + qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC_MASK, q->iobase + QUADSPI_SPTRCLR);
> + /* make sure the Rx buffer is read through AHB, not IP */
> + qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, q->iobase + QUADSPI_RBCT);
> +
> + /* set the chip address for READID */
> + fsl_qspi_set_base_addr(q, q->nor);
>
> /* if necessary,ioremap buffer before AHB read, */
> if (!q->ahb_addr) {
> - q->memmap_offs = q->chip_base_addr + from;
> - q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> + q->memmap_offs = q->chip_base_addr;
> + q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
>
> q->ahb_addr = ioremap_nocache(
> q->memmap_phy + q->memmap_offs,
> @@ -906,8 +967,8 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
> q->memmap_offs + q->memmap_len) {
> iounmap(q->ahb_addr);
>
> - q->memmap_offs = q->chip_base_addr + from;
> - q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> + q->memmap_offs = q->chip_base_addr;
> + q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
> q->ahb_addr = ioremap_nocache(
> q->memmap_phy + q->memmap_offs,
> q->memmap_len);
> @@ -917,13 +978,11 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
> }
> }
>
> - dev_dbg(q->dev, "cmd [%x],read from %p, len:%zd\n",
> - cmd, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> - len);
> + dev_dbg(q->dev, "cmd [%x], read from %p, len:%zd\n",
> + cmd, q->ahb_addr + nor_ofs + from, len);
>
> /* Read out the data directly from the AHB buffer.*/
> - memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> - len);
> + memcpy(buf, q->ahb_addr + nor_ofs + from, len);
No much diff from the previous implementation
>
> return len;
> }
> @@ -980,6 +1039,11 @@ static int fsl_qspi_probe(struct platform_device *pdev)
> struct spi_nor *nor;
> struct mtd_info *mtd;
> int ret, i = 0;
> + const struct of_device_id *of_id =
> + of_match_device(fsl_qspi_dt_ids, &pdev->dev);
> + enum read_mode readmode;
> + unsigned int buswidth;
> + u32 qspimaxfreq, spimaxfreq;
>
> q = devm_kzalloc(dev, sizeof(*q), GFP_KERNEL);
> if (!q)
> @@ -1050,6 +1114,9 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>
> mutex_init(&q->lock);
>
> + /* Running MIN of all "spi-max-frequency' values present in subnodes */
> + qspimaxfreq = INT_MAX;
> +
> /* iterate the subnodes. */
> for_each_available_child_of_node(dev->of_node, np) {
> /* skip the holes */
> @@ -1073,15 +1140,44 @@ static int fsl_qspi_probe(struct platform_device *pdev)
> nor->prepare = fsl_qspi_prep;
> nor->unprepare = fsl_qspi_unprep;
>
> + /* Update running MIN of subnode max frequencies */
> ret = of_property_read_u32(np, "spi-max-frequency",
> - &q->clk_rate);
> + &spimaxfreq);
> if (ret < 0)
> goto mutex_failed;
> + qspimaxfreq = (qspimaxfreq > spimaxfreq) ? spimaxfreq: qspimaxfreq;
>
> /* set the chip address for READID */
> fsl_qspi_set_base_addr(q, nor);
>
> - ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
> + /* scan using the proper read mode for this subnode */
> + if (of_property_read_u32(np, "spi-bus-width", &buswidth)<0) {
> + nor->read_opcode = SPINOR_OP_READ;
> + readmode = SPI_NOR_NORMAL;
> + } else {
> + switch (buswidth) {
> + case 4:
> + nor->read_opcode = SPINOR_OP_READ_1_1_4;
> + readmode = SPI_NOR_QUAD;
> + break;
> + case 2:
> + nor->read_opcode = SPINOR_OP_READ_1_1_2;
> + readmode = SPI_NOR_DUAL;
> + break;
> + case 1:
> + nor->read_opcode = SPINOR_OP_READ_FAST;
> + readmode = SPI_NOR_FAST;
> + break;
> + default:
> + dev_warn(q->dev,
> + "ignoring spi-bus-width=%d (should be 1, 2 or 4)",
> + buswidth);
> + nor->read_opcode = SPINOR_OP_READ;
> + readmode = SPI_NOR_NORMAL;
> + }
> + }
> +
> + ret = spi_nor_scan(nor, NULL, readmode);
Not necessary, can fetch info from nor structure
https://patchwork.ozlabs.org/patch/613429/
> if (ret)
> goto mutex_failed;
>
> @@ -1112,6 +1208,10 @@ static int fsl_qspi_probe(struct platform_device *pdev)
> i++;
> }
>
> + /* If we know the lowest subnode frequency, override driver default. */
> + if (qspimaxfreq<INT_MAX)
> + q->clk_rate = qspimaxfreq;
> +
> /* finish the rest init. */
> ret = fsl_qspi_nor_setup_last(q);
> if (ret)
> --
> 2.9.3
>
>
> ______________________________________________________
> Linux MTD discussion mailing list
> http://lists.infradead.org/mailman/listinfo/linux-mtd/
More information about the linux-mtd
mailing list