[PATCH v3 10/13] spi: cadence-quadspi: enable PHY for direct reads and indirect writes
Santhosh Kumar K
s-k6 at ti.com
Mon Jun 1 01:39:58 PDT 2026
On 28/05/26 14:39, Miquel Raynal wrote:
> On 27/05/2026 at 23:25:24 +0530, Santhosh Kumar K <s-k6 at ti.com> wrote:
>
>> After PHY tuning completes, data transfers still use the default
>> read-capture path. The PHY pipeline must be activated around each
>> eligible transfer to benefit from the calibrated delay settings.
>>
>> Add cqspi_phy_enable() to toggle PHY mode. Enabling sets the calibrated
>> read-capture delay, asserts PHY_EN and PHY_PIPELINE, and decrements the
>> dummy cycle count by one since the PHY pipeline absorbs that latency.
>> Disabling reverses all three. Returns cqspi_wait_idle() so callers can
>> abort if the controller stalls on enable; disable is best-effort.
>>
>> Split cqspi_direct_read_execute() so PHY-eligible reads run DMA over the
>> 16-byte-aligned middle section with PHY active, while unaligned head and
>> tail bytes are transferred without PHY. PHY is used when use_phy is set,
>> the transfer exceeds 16 bytes, and the frequency matches the tuned rate.
>> cqspi_memcpy_fromio() handles small and non-DMA-able transfers, with
>> special handling for 8D-8D-8D to ensure 2-byte-aligned I/O accesses.
>>
>> For indirect writes, PHY is enabled for transfers of at least 1 KB
>
> kiB :-)
>
>> where the setup overhead is amortized.
>>
>> Signed-off-by: Santhosh Kumar K <s-k6 at ti.com>
>> ---
>> drivers/spi/spi-cadence-quadspi.c | 181 ++++++++++++++++++++++++++++--
>> 1 file changed, 171 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
>> index 72208d376305..80e7c572ab80 100644
>> --- a/drivers/spi/spi-cadence-quadspi.c
>> +++ b/drivers/spi/spi-cadence-quadspi.c
>> @@ -564,6 +564,61 @@ static void cqspi_readdata_capture(struct cqspi_st *cqspi, const bool bypass,
>> writel(reg, reg_base + CQSPI_REG_READCAPTURE);
>> }
>>
>> +static int cqspi_phy_enable(struct cqspi_flash_pdata *f_pdata, bool
>> enable)
>
> I'm fine with the logic, just the naming is very "TI" specific here. Can
> we name the helper "cqspi_tune_phy(f_pdata, enable)"?
>
> [...]
>
>> static int cqspi_exec_flash_cmd(struct cqspi_st *cqspi, unsigned int reg)
>> {
>> void __iomem *reg_base = cqspi->iobase;
>> @@ -1191,6 +1246,7 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>> void __iomem *reg_base = cqspi->iobase;
>> unsigned int remaining = n_tx;
>> unsigned int write_bytes;
>> + bool use_phy_write;
>> int ret;
>>
>> if (!refcount_read(&cqspi->refcount))
>> @@ -1226,6 +1282,15 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>> if (cqspi->apb_ahb_hazard)
>> readl(reg_base + CQSPI_REG_INDIRECTWR);
>>
>> + /* Use PHY only for large writes where setup overhead is amortized */
>> + use_phy_write = n_tx >= SZ_1K && f_pdata->use_phy;
>
> Maybe also "f_pdata->use_tuned_phy?
Yeah, I'll rename them in v4.
>
>> + if (use_phy_write) {
>> + ret = cqspi_phy_enable(f_pdata, true);
>> + if (ret)
>> + goto failwr;
>> + }
>> +
>> while (remaining > 0) {
>> size_t write_words, mod_bytes;
>>
>> @@ -1266,6 +1331,9 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>> goto failwr;
>> }
>>
>> + if (use_phy_write)
>> + cqspi_phy_enable(f_pdata, false);
>> +
>> /* Disable interrupt. */
>> writel(0, reg_base + CQSPI_REG_IRQMASK);
>>
>> @@ -1277,6 +1345,9 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>> return 0;
>>
>> failwr:
>> + if (use_phy_write)
>> + cqspi_phy_enable(f_pdata, false);
>> +
>> /* Disable interrupt. */
>> writel(0, reg_base + CQSPI_REG_IRQMASK);
>>
>> @@ -1448,8 +1519,15 @@ static void cqspi_rx_dma_callback(void *param)
>> complete(&cqspi->rx_dma_complete);
>> }
>>
>> -static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
>> - u_char *buf, loff_t from, size_t len)
>> +static bool cqspi_use_phy(struct cqspi_flash_pdata *f_pdata,
>> + const struct spi_mem_op *op)
>> +{
>> + return f_pdata->use_phy && op->data.nbytes > 16 &&
>
> Why is the check looking for 16 here, and 1kiB above?
Direct reads have very little per-op overhead, so enabling PHY is
beneficial even for relatively small transfers. (> 16)
Indirect writes, on the other hand, incur significantly higher setup
cost, resulting in much larger break point. (> 1kiB)
>
>> + op->max_freq == f_pdata->max_clk_rate;
>> +}
>> +
>> +static int cqspi_direct_read_dma(struct cqspi_flash_pdata *f_pdata, u_char *buf,
>> + loff_t from, size_t len)
>> {
>> struct cqspi_st *cqspi = f_pdata->cqspi;
>> struct device *dev = &cqspi->pdev->dev;
>> @@ -1461,19 +1539,14 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
>> dma_addr_t dma_dst;
>> struct device *ddev;
>>
>> - if (!cqspi->rx_chan || !virt_addr_valid(buf)) {
>> - memcpy_fromio(buf, cqspi->ahb_base + from, len);
>> - return 0;
>> - }
>
> This (and changes below) don't seem to be directly related to the PHY
> addition, could we have those changes done in a separated patch, before
> introducing PHY tuning use?
>
>> -
>> ddev = cqspi->rx_chan->device->dev;
>> dma_dst = dma_map_single(ddev, buf, len, DMA_FROM_DEVICE);
>> if (dma_mapping_error(ddev, dma_dst)) {
>> dev_err(dev, "dma mapping failed\n");
>> return -ENOMEM;
>> }
>> - tx = dmaengine_prep_dma_memcpy(cqspi->rx_chan, dma_dst, dma_src,
>> - len, flags);
>> + tx = dmaengine_prep_dma_memcpy(cqspi->rx_chan, dma_dst, dma_src, len,
>> + flags);
>
> Not related to the change, isn't it?
Yeah, not related I'll leave this untouched. However, the changes above
and below are related and belong together in the same patch.
>
>> if (!tx) {
>> dev_err(dev, "device_prep_dma_memcpy error\n");
>> ret = -EIO;
>> @@ -1507,6 +1580,94 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
>> return ret;
>> }
>>
>
> [...]
>
>> static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
>> const struct spi_mem_op *op)
>> {
>> @@ -1524,7 +1685,7 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
>>
>> if ((cqspi->use_direct_mode && ((from + len) <= cqspi->ahb_size)) ||
>> (cqspi->ddata && cqspi->ddata->quirks & CQSPI_NO_INDIRECT_MODE))
>> - return cqspi_direct_read_execute(f_pdata, buf, from, len);
>> + return cqspi_direct_read_execute(f_pdata, op);
>
> This change could also be done in a different commit.
>
Thanks,
Santhosh.
>>
>> if (cqspi->use_dma_read && ddata && ddata->indirect_read_dma &&
>> virt_addr_valid(buf) && ((dma_align & CQSPI_DMA_UNALIGN) == 0))
>
> Thanks,
> Miquèl
More information about the linux-mtd
mailing list