[RFC PATCH] mmc: dw_mmc: rework the accurate timeout calculation

Shawn Lin shawn.lin at rock-chips.com
Mon Apr 23 18:29:06 PDT 2018


Hi all,

On 2018/3/29 16:18, Shawn Lin wrote:
> The intention of this patch is to rework the timeout calculation
> for dw_mmc, which isn't ideal enough. It's harmless to add longest
> timeout as normally the transfer is fine and the occasional failure
> could break out by no matter the Hardware interrupt or software timer.
> 
> However, it doesn't make sense in the tuning process, as it could
> trigger more timeout than usual, which is very painful for booting
> time. Currently we set TMOUT to 0xffffffff anyway. Assume the max
> clock is 200MHz when doing tuning for HS200, the we have:
> 
> MSEC_PER_SEC * 0xffffff / 200MHz = 83ms for hardware timemout interrupt
> and 93ms for software timer.
> 
> However, we should note that the cards should guarantee to complete a
> sequence of 40 times CMD21 execulations within 150ms per the spec. But
> we now have 83ms timeout for each CMD21, which only contains 128Bytes
> data playload.
> 
> Maybe the simplest way is to reduce the HW/SW timeout value for tuning
> process but it's still worth trying to rework a more accurate timeout.
> 

Just as a regular share that this patch was backported to 4.4 kernel
tree by Ziyuan(cc'ed) and pass the intensive test in QA cycle. I would
still like to spin this more time to make sure it work fine by more
solid test. Will come back to send a regular patch in coming weeks.

Any comments are welcome.

> Signed-off-by: Shawn Lin <shawn.lin at rock-chips.com>
> 
> ---
> 
>   drivers/mmc/host/dw_mmc.c | 107 +++++++++++++++++++++++++++++++++-------------
>   drivers/mmc/host/dw_mmc.h |   4 ++
>   2 files changed, 82 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
> index 623f4d2..c85d703 100644
> --- a/drivers/mmc/host/dw_mmc.c
> +++ b/drivers/mmc/host/dw_mmc.c
> @@ -378,23 +378,84 @@ static u32 dw_mci_prep_stop_abort(struct dw_mci *host, struct mmc_command *cmd)
>   	return cmdr;
>   }
>   
> -static inline void dw_mci_set_cto(struct dw_mci *host)
> +static void dw_mci_calc_timeout(struct dw_mci *host)
>   {
> -	unsigned int cto_clks;
> -	unsigned int cto_div;
> -	unsigned int cto_ms;
> -	unsigned long irqflags;
> +	struct mmc_data *data = host->data;
> +	struct mmc_host *mmc = host->slot->mmc;
> +	u64 trsf_time_per_blksz;
> +	u64 idle_time_per_trsf;
> +	u64 val;
> +	struct mmc_ios *ios = &mmc->ios;
> +	unsigned char bus_width = ios->bus_width;
> +	unsigned int freq, clk_div;
> +	unsigned int cto_clks, drto_clks;
> +
> +	clk_div = (mci_readl(host, CLKDIV) & 0xff) * 2;
> +	if (clk_div == 0)
> +		clk_div = 1;
> +
> +	freq = host->bus_hz / clk_div;
> +
> +	/*
> +	 * The largest 136 bit response in 100KHz is nearly 1ms,
> +	 * and add other transfer parameter per spec, we should
> +	 * allow 2ms maybe. And should also add some spare time
> +	 * to tolerate anything unknown leading to unexpect long
> +	 * latency.
> +	 */
> +	host->cmd_timeout_ns = 10 * NSEC_PER_MSEC;
> +	host->data_timeout_ns = 10 * NSEC_PER_MSEC;
> +
> +	if (data) {
> +		trsf_time_per_blksz = DIV_ROUND_UP_ULL((u64)data->blksz *
> +					NSEC_PER_SEC * (8 / bus_width), freq);
> +
> +		/*
> +		 * Especially multiply by 2 to account for write for anything
> +		 * unknown in the cards, for each block.
> +		 */
> +		if (data->flags & MMC_DATA_WRITE)
> +			trsf_time_per_blksz *= 2;
> +
> +		/* Calculate idle time from core basd on spec */
> +		idle_time_per_trsf = DIV_ROUND_UP_ULL(data->timeout_ns,
> +						      NSEC_PER_USEC);
> +		if (data->timeout_clks) {
> +			/* Align up from cycle of MHz to Hz */
> +			val = 1000000ULL * data->timeout_clks;
> +			if (DIV_ROUND_UP_ULL(val, freq))
> +				idle_time_per_trsf++;
> +			idle_time_per_trsf += val;
> +		}
> +
> +		/* Calculate timeout for the entire data */
> +		host->data_timeout_ns +=
> +			data->blocks * (DIV_ROUND_UP_ULL(idle_time_per_trsf,
> +					NSEC_PER_USEC) + trsf_time_per_blksz);
>   
> -	cto_clks = mci_readl(host, TMOUT) & 0xff;
> -	cto_div = (mci_readl(host, CLKDIV) & 0xff) * 2;
> -	if (cto_div == 0)
> -		cto_div = 1;
> +		drto_clks = DIV_ROUND_UP_ULL(host->data_timeout_ns * freq,
> +					     NSEC_PER_SEC);
> +		if (drto_clks > 0xffffff)
> +			drto_clks = 0xffffff;
>   
> -	cto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * cto_clks * cto_div,
> -				  host->bus_hz);
> +		/* Set accurate hw data timeout */
> +		mci_writel(host, TMOUT, drto_clks << 8);
> +	}
>   
> -	/* add a bit spare time */
> -	cto_ms += 10;
> +	host->cmd_timeout_ns += host->cmd->busy_timeout * NSEC_PER_MSEC;
> +	cto_clks = DIV_ROUND_UP_ULL(host->cmd_timeout_ns * freq,
> +				    NSEC_PER_SEC);
> +	if (cto_clks > 0xff)
> +		cto_clks = 0xff;
> +
> +	/* Set accurate cmd timeout */
> +	mci_writel(host, TMOUT, cto_clks |
> +				(mci_readl(host, TMOUT) & 0xffffff00));
> +}
> +
> +static inline void dw_mci_set_cto(struct dw_mci *host)
> +{
> +	unsigned long irqflags;
>   
>   	/*
>   	 * The durations we're working with are fairly short so we have to be
> @@ -412,7 +473,7 @@ static inline void dw_mci_set_cto(struct dw_mci *host)
>   	spin_lock_irqsave(&host->irq_lock, irqflags);
>   	if (!test_bit(EVENT_CMD_COMPLETE, &host->pending_events))
>   		mod_timer(&host->cto_timer,
> -			jiffies + msecs_to_jiffies(cto_ms) + 1);
> +			jiffies + nsecs_to_jiffies(host->cmd_timeout_ns) + 1);
>   	spin_unlock_irqrestore(&host->irq_lock, irqflags);
>   }
>   
> @@ -424,6 +485,8 @@ static void dw_mci_start_command(struct dw_mci *host,
>   		 "start command: ARGR=0x%08x CMDR=0x%08x\n",
>   		 cmd->arg, cmd_flags);
>   
> +	dw_mci_calc_timeout(host);
> +
>   	mci_writel(host, CMDARG, cmd->arg);
>   	wmb(); /* drain writebuffer */
>   	dw_mci_wait_while_busy(host, cmd_flags);
> @@ -1923,26 +1986,12 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data)
>   
>   static void dw_mci_set_drto(struct dw_mci *host)
>   {
> -	unsigned int drto_clks;
> -	unsigned int drto_div;
> -	unsigned int drto_ms;
>   	unsigned long irqflags;
>   
> -	drto_clks = mci_readl(host, TMOUT) >> 8;
> -	drto_div = (mci_readl(host, CLKDIV) & 0xff) * 2;
> -	if (drto_div == 0)
> -		drto_div = 1;
> -
> -	drto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * drto_clks * drto_div,
> -				   host->bus_hz);
> -
> -	/* add a bit spare time */
> -	drto_ms += 10;
> -
>   	spin_lock_irqsave(&host->irq_lock, irqflags);
>   	if (!test_bit(EVENT_DATA_COMPLETE, &host->pending_events))
>   		mod_timer(&host->dto_timer,
> -			  jiffies + msecs_to_jiffies(drto_ms));
> +			  jiffies + nsecs_to_jiffies(host->data_timeout_ns));
>   	spin_unlock_irqrestore(&host->irq_lock, irqflags);
>   }
>   
> diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
> index 46e9f8e..4330128 100644
> --- a/drivers/mmc/host/dw_mmc.h
> +++ b/drivers/mmc/host/dw_mmc.h
> @@ -126,7 +126,9 @@ struct dw_mci_dma_slave {
>    * @sdio_id0: Number of slot0 in the SDIO interrupt registers.
>    * @cmd11_timer: Timer for SD3.0 voltage switch over scheme.
>    * @cto_timer: Timer for broken command transfer over scheme.
> + * @cmd_timeout_ns: Timeout value accociated with cto_timer
>    * @dto_timer: Timer for broken data transfer over scheme.
> + * @data_timeout_ns: Timeout value accociated with dto_timer
>    *
>    * Locking
>    * =======
> @@ -233,7 +235,9 @@ struct dw_mci {
>   
>   	struct timer_list       cmd11_timer;
>   	struct timer_list       cto_timer;
> +	u64                     cmd_timeout_ns;
>   	struct timer_list       dto_timer;
> +	u64                     data_timeout_ns;
>   };
>   
>   /* DMA ops for Internal/External DMAC interface */
> 




More information about the Linux-rockchip mailing list