[PATCH v3 10/13] spi: cadence-quadspi: enable PHY for direct reads and indirect writes

Santhosh Kumar K s-k6 at ti.com
Mon Jun 1 01:39:58 PDT 2026



On 28/05/26 14:39, Miquel Raynal wrote:
> On 27/05/2026 at 23:25:24 +0530, Santhosh Kumar K <s-k6 at ti.com> wrote:
> 
>> After PHY tuning completes, data transfers still use the default
>> read-capture path. The PHY pipeline must be activated around each
>> eligible transfer to benefit from the calibrated delay settings.
>>
>> Add cqspi_phy_enable() to toggle PHY mode. Enabling sets the calibrated
>> read-capture delay, asserts PHY_EN and PHY_PIPELINE, and decrements the
>> dummy cycle count by one since the PHY pipeline absorbs that latency.
>> Disabling reverses all three. Returns cqspi_wait_idle() so callers can
>> abort if the controller stalls on enable; disable is best-effort.
>>
>> Split cqspi_direct_read_execute() so PHY-eligible reads run DMA over the
>> 16-byte-aligned middle section with PHY active, while unaligned head and
>> tail bytes are transferred without PHY. PHY is used when use_phy is set,
>> the transfer exceeds 16 bytes, and the frequency matches the tuned rate.
>> cqspi_memcpy_fromio() handles small and non-DMA-able transfers, with
>> special handling for 8D-8D-8D to ensure 2-byte-aligned I/O accesses.
>>
>> For indirect writes, PHY is enabled for transfers of at least 1 KB
> 
> kiB :-)
> 
>> where the setup overhead is amortized.
>>
>> Signed-off-by: Santhosh Kumar K <s-k6 at ti.com>
>> ---
>>   drivers/spi/spi-cadence-quadspi.c | 181 ++++++++++++++++++++++++++++--
>>   1 file changed, 171 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
>> index 72208d376305..80e7c572ab80 100644
>> --- a/drivers/spi/spi-cadence-quadspi.c
>> +++ b/drivers/spi/spi-cadence-quadspi.c
>> @@ -564,6 +564,61 @@ static void cqspi_readdata_capture(struct cqspi_st *cqspi, const bool bypass,
>>   	writel(reg, reg_base + CQSPI_REG_READCAPTURE);
>>   }
>>   
>> +static int cqspi_phy_enable(struct cqspi_flash_pdata *f_pdata, bool
>> enable)
> 
> I'm fine with the logic, just the naming is very "TI" specific here. Can
> we name the helper "cqspi_tune_phy(f_pdata, enable)"?
> 
> [...]
> 
>>   static int cqspi_exec_flash_cmd(struct cqspi_st *cqspi, unsigned int reg)
>>   {
>>   	void __iomem *reg_base = cqspi->iobase;
>> @@ -1191,6 +1246,7 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>>   	void __iomem *reg_base = cqspi->iobase;
>>   	unsigned int remaining = n_tx;
>>   	unsigned int write_bytes;
>> +	bool use_phy_write;
>>   	int ret;
>>   
>>   	if (!refcount_read(&cqspi->refcount))
>> @@ -1226,6 +1282,15 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>>   	if (cqspi->apb_ahb_hazard)
>>   		readl(reg_base + CQSPI_REG_INDIRECTWR);
>>   
>> +	/* Use PHY only for large writes where setup overhead is amortized */
>> +	use_phy_write = n_tx >= SZ_1K && f_pdata->use_phy;
> 
> Maybe also "f_pdata->use_tuned_phy?

Yeah, I'll rename them in v4.

> 
>> +	if (use_phy_write) {
>> +		ret = cqspi_phy_enable(f_pdata, true);
>> +		if (ret)
>> +			goto failwr;
>> +	}
>> +
>>   	while (remaining > 0) {
>>   		size_t write_words, mod_bytes;
>>   
>> @@ -1266,6 +1331,9 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>>   		goto failwr;
>>   	}
>>   
>> +	if (use_phy_write)
>> +		cqspi_phy_enable(f_pdata, false);
>> +
>>   	/* Disable interrupt. */
>>   	writel(0, reg_base + CQSPI_REG_IRQMASK);
>>   
>> @@ -1277,6 +1345,9 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
>>   	return 0;
>>   
>>   failwr:
>> +	if (use_phy_write)
>> +		cqspi_phy_enable(f_pdata, false);
>> +
>>   	/* Disable interrupt. */
>>   	writel(0, reg_base + CQSPI_REG_IRQMASK);
>>   
>> @@ -1448,8 +1519,15 @@ static void cqspi_rx_dma_callback(void *param)
>>   	complete(&cqspi->rx_dma_complete);
>>   }
>>   
>> -static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
>> -				     u_char *buf, loff_t from, size_t len)
>> +static bool cqspi_use_phy(struct cqspi_flash_pdata *f_pdata,
>> +			  const struct spi_mem_op *op)
>> +{
>> +	return f_pdata->use_phy && op->data.nbytes > 16 &&
> 
> Why is the check looking for 16 here, and 1kiB above?

Direct reads have very little per-op overhead, so enabling PHY is
beneficial even for relatively small transfers. (> 16)

Indirect writes, on the other hand, incur significantly higher setup
cost, resulting in much larger break point. (> 1kiB)

> 
>> +	       op->max_freq == f_pdata->max_clk_rate;
>> +}
>> +
>> +static int cqspi_direct_read_dma(struct cqspi_flash_pdata *f_pdata, u_char *buf,
>> +				 loff_t from, size_t len)
>>   {
>>   	struct cqspi_st *cqspi = f_pdata->cqspi;
>>   	struct device *dev = &cqspi->pdev->dev;
>> @@ -1461,19 +1539,14 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
>>   	dma_addr_t dma_dst;
>>   	struct device *ddev;
>>   
>> -	if (!cqspi->rx_chan || !virt_addr_valid(buf)) {
>> -		memcpy_fromio(buf, cqspi->ahb_base + from, len);
>> -		return 0;
>> -	}
> 
> This (and changes below) don't seem to be directly related to the PHY
> addition, could we have those changes done in a separated patch, before
> introducing PHY tuning use?
> 
>> -
>>   	ddev = cqspi->rx_chan->device->dev;
>>   	dma_dst = dma_map_single(ddev, buf, len, DMA_FROM_DEVICE);
>>   	if (dma_mapping_error(ddev, dma_dst)) {
>>   		dev_err(dev, "dma mapping failed\n");
>>   		return -ENOMEM;
>>   	}
>> -	tx = dmaengine_prep_dma_memcpy(cqspi->rx_chan, dma_dst, dma_src,
>> -				       len, flags);
>> +	tx = dmaengine_prep_dma_memcpy(cqspi->rx_chan, dma_dst, dma_src, len,
>> +				       flags);
> 
> Not related to the change, isn't it?

Yeah, not related I'll leave this untouched. However, the changes above
and below are related and belong together in the same patch.

> 
>>   	if (!tx) {
>>   		dev_err(dev, "device_prep_dma_memcpy error\n");
>>   		ret = -EIO;
>> @@ -1507,6 +1580,94 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
>>   	return ret;
>>   }
>>   
> 
> [...]
> 
>>   static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
>>   			  const struct spi_mem_op *op)
>>   {
>> @@ -1524,7 +1685,7 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
>>   
>>   	if ((cqspi->use_direct_mode && ((from + len) <= cqspi->ahb_size)) ||
>>   	    (cqspi->ddata && cqspi->ddata->quirks & CQSPI_NO_INDIRECT_MODE))
>> -		return cqspi_direct_read_execute(f_pdata, buf, from, len);
>> +		return cqspi_direct_read_execute(f_pdata, op);
> 
> This change could also be done in a different commit.
> 

Thanks,
Santhosh.

>>   
>>   	if (cqspi->use_dma_read && ddata && ddata->indirect_read_dma &&
>>   	    virt_addr_valid(buf) && ((dma_align & CQSPI_DMA_UNALIGN) == 0))
> 
> Thanks,
> Miquèl




More information about the linux-mtd mailing list