[PATCH v1 1/2] mmc: mediatek: add support for 64-steps tuning

Chaotian Jing (井朝天) Chaotian.Jing at mediatek.com
Tue Nov 28 17:39:12 PST 2023


On Fri, 2023-11-24 at 15:08 +0800, Axe Yang wrote:
> Previously, during the MSDC calibration process, a full clock cycle
> actually not be covered, which in some cases didn't yield the best
> results and could cause CRC errors. This problem is particularly
> evident when MSDC is used as an SDIO host. To solve this issue, the
> tuning step should be extended from 32 to 64. By using 64-steps
> tuning, a complete clock cycle can be covered.
> 
> To illustrate, when tuning 32-steps, if the obtained window has a
> hole
> near the middle, like this: 0xffc07ff (hex), then the selected delay
> will be the 6 (counting from right to left).
> 
> (32 <- 1)
> 1111 1111 1100 0000 0000 0111 11(1)1 1111
> 
> However, if we tune 64-steps, the window obtained may look like this:
> 0xfffffffffffc07ff. The final selected delay will be 44, which is
> safer as it is further away from the hole:
> 
> (64 <- 1)
> 1111 ... (1)111 1111 1111 1111 1111 1100 0000 0000 0111 1111 1111
> 
> In this case, delay 6 selected through 32-steps tuning is obviously
> not optimal, and this delay is closer to the hole, using it would
> easily cause CRC problems.
> 
> You will need to add property "mediatek,tune-64-steps" in MSDC dts
> node to enable the feature.
> 
> Signed-off-by: Axe Yang <axe.yang at mediatek.com>
> ---
>  drivers/mmc/host/mtk-sd.c | 133 +++++++++++++++++++++++++++---------
> --
>  1 file changed, 97 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
> index 97f7c3d4be6e..aa955240e441 100644
> --- a/drivers/mmc/host/mtk-sd.c
> +++ b/drivers/mmc/host/mtk-sd.c
> @@ -252,12 +252,16 @@
>  
>  #define MSDC_PAD_TUNE_DATWRDLY	  GENMASK(4, 0)		/* RW
> */
>  #define MSDC_PAD_TUNE_DATRRDLY	  GENMASK(12, 8)	/* RW */
> +#define MSDC_PAD_TUNE_DATRRDLY2	  GENMASK(12, 8)	/* RW */
>  #define MSDC_PAD_TUNE_CMDRDLY	  GENMASK(20, 16)	/* RW */
> +#define MSDC_PAD_TUNE_CMDRDLY2	  GENMASK(20, 16)	/* RW */
>  #define MSDC_PAD_TUNE_CMDRRDLY	  GENMASK(26, 22)	/* RW */
>  #define MSDC_PAD_TUNE_CLKTDLY	  GENMASK(31, 27)	/* RW */
>  #define MSDC_PAD_TUNE_RXDLYSEL	  BIT(15)   /* RW */
>  #define MSDC_PAD_TUNE_RD_SEL	  BIT(13)   /* RW */
>  #define MSDC_PAD_TUNE_CMD_SEL	  BIT(21)   /* RW */
> +#define MSDC_PAD_TUNE_RD2_SEL	  BIT(13)   /* RW */
> +#define MSDC_PAD_TUNE_CMD2_SEL	  BIT(21)   /* RW */
>  
>  #define PAD_DS_TUNE_DLY_SEL       BIT(0)	  /* RW */
>  #define PAD_DS_TUNE_DLY1	  GENMASK(6, 2)   /* RW */
> @@ -325,7 +329,8 @@
>  
>  #define DEFAULT_DEBOUNCE	(8)	/* 8 cycles CD debounce */
>  
> -#define PAD_DELAY_MAX	32 /* PAD delay cells */
> +#define PAD_DELAY_HALF	32 /* PAD delay cells */
> +#define PAD_DELAY_FULL	64
>  /*----------------------------------------------------------------
> ----------*/
>  /* Descriptor
> Structure                                                     */
>  /*----------------------------------------------------------------
> ----------*/
> @@ -465,6 +470,7 @@ struct msdc_host {
>  				 /* cmd response sample selection for
> HS400 */
>  	bool hs400_mode;	/* current eMMC will run at hs400 mode */
>  	bool hs400_tuning;	/* hs400 mode online tuning */
> +	bool tune_64steps;
>  	bool internal_cd;	/* Use internal card-detect logic */
>  	bool cqhci;		/* support eMMC hw cmdq */
>  	struct msdc_save_para save_para; /* used when gate HCLK */
> @@ -1615,7 +1621,7 @@ static irqreturn_t msdc_cmdq_irq(struct
> msdc_host *host, u32 intsts)
>  	}
>  
>  	if (cmd_err || dat_err) {
> -		dev_err(host->dev, "cmd_err = %d, dat_err =%d, intsts =
> 0x%x",
> +		dev_err(host->dev, "cmd_err = %d, dat_err = %d, intsts
> = 0x%x",
>  			cmd_err, dat_err, intsts);
>  	}
>  
> @@ -1780,10 +1786,20 @@ static void msdc_init_hw(struct msdc_host
> *host)
>  				     DATA_K_VALUE_SEL);
>  			sdr_set_bits(host->top_base + EMMC_TOP_CMD,
>  				     PAD_CMD_RD_RXDLY_SEL);
> +			if (host->tune_64steps) {
> +				sdr_set_bits(host->top_base +
> EMMC_TOP_CONTROL,
> +					     PAD_DAT_RD_RXDLY2_SEL);
> +				sdr_set_bits(host->top_base +
> EMMC_TOP_CMD,
> +					     PAD_CMD_RD_RXDLY2_SEL);
> +			}
>  		} else {
>  			sdr_set_bits(host->base + tune_reg,
>  				     MSDC_PAD_TUNE_RD_SEL |
>  				     MSDC_PAD_TUNE_CMD_SEL);
> +			if (host->tune_64steps)
> +				sdr_set_bits(host->base + tune_reg + 4,
> +					     MSDC_PAD_TUNE_RD2_SEL |
> +					     MSDC_PAD_TUNE_CMD2_SEL);
>  		}
>  	} else {
>  		/* choose clock tune */
> @@ -1925,24 +1941,24 @@ static void msdc_ops_set_ios(struct mmc_host
> *mmc, struct mmc_ios *ios)
>  		msdc_set_mclk(host, ios->timing, ios->clock);
>  }
>  
> -static u32 test_delay_bit(u32 delay, u32 bit)
> +static u64 test_delay_bit(u64 delay, u32 bit)
>  {
> -	bit %= PAD_DELAY_MAX;
> +	bit %= PAD_DELAY_FULL;
>  	return delay & BIT(bit);
Since you are testing > 32 bits, you should use BIT_ULL() to instead of
BIT().
>  }
>  
> -static int get_delay_len(u32 delay, u32 start_bit)
> +static int get_delay_len(u64 delay, u32 start_bit)
>  {
>  	int i;
>  
> -	for (i = 0; i < (PAD_DELAY_MAX - start_bit); i++) {
> +	for (i = 0; i < (PAD_DELAY_FULL - start_bit); i++) {
>  		if (test_delay_bit(delay, start_bit + i) == 0)
>  			return i;
>  	}
> -	return PAD_DELAY_MAX - start_bit;
> +	return PAD_DELAY_FULL - start_bit;
>  }
>  
> -static struct msdc_delay_phase get_best_delay(struct msdc_host
> *host, u32 delay)
> +static struct msdc_delay_phase get_best_delay(struct msdc_host
> *host, u64 delay)
>  {
>  	int start = 0, len = 0;
>  	int start_final = 0, len_final = 0;
> @@ -1950,28 +1966,28 @@ static struct msdc_delay_phase
> get_best_delay(struct msdc_host *host, u32 delay)
>  	struct msdc_delay_phase delay_phase = { 0, };
>  
>  	if (delay == 0) {
> -		dev_err(host->dev, "phase error: [map:%x]\n", delay);
> +		dev_err(host->dev, "phase error: [map:%llx]\n", delay);
>  		delay_phase.final_phase = final_phase;
>  		return delay_phase;
>  	}
>  
> -	while (start < PAD_DELAY_MAX) {
> +	while (start < PAD_DELAY_FULL) {
>  		len = get_delay_len(delay, start);
>  		if (len_final < len) {
>  			start_final = start;
>  			len_final = len;
>  		}
>  		start += len ? len : 1;
> -		if (len >= 12 && start_final < 4)
> +		if (!upper_32_bits(delay) && len >= 12 && start_final <
> 4)
>  			break;
>  	}
>  
>  	/* The rule is that to find the smallest delay cell */
>  	if (start_final == 0)
> -		final_phase = (start_final + len_final / 3) %
> PAD_DELAY_MAX;
> +		final_phase = (start_final + len_final / 3) %
> PAD_DELAY_FULL;
>  	else
> -		final_phase = (start_final + len_final / 2) %
> PAD_DELAY_MAX;
> -	dev_dbg(host->dev, "phase: [map:%x] [maxlen:%d] [final:%d]\n",
> +		final_phase = (start_final + len_final / 2) %
> PAD_DELAY_FULL;
> +	dev_dbg(host->dev, "phase: [map:%llx] [maxlen:%d]
> [final:%d]\n",
>  		delay, len_final, final_phase);
>  
>  	delay_phase.maxlen = len_final;
> @@ -1984,24 +2000,62 @@ static inline void msdc_set_cmd_delay(struct
> msdc_host *host, u32 value)
>  {
>  	u32 tune_reg = host->dev_comp->pad_tune_reg;
>  
> -	if (host->top_base)
> -		sdr_set_field(host->top_base + EMMC_TOP_CMD,
> PAD_CMD_RXDLY,
> -			      value);
> -	else
> -		sdr_set_field(host->base + tune_reg,
> MSDC_PAD_TUNE_CMDRDLY,
> -			      value);
> +	if (host->top_base) {
> +		if (value < PAD_DELAY_HALF) {
> +			sdr_set_field(host->top_base + EMMC_TOP_CMD,
> PAD_CMD_RXDLY,
> +				      value);
> +			sdr_set_field(host->top_base + EMMC_TOP_CMD,
> PAD_CMD_RXDLY2,
> +				      0);
> +		} else {
> +			sdr_set_field(host->top_base + EMMC_TOP_CMD,
> PAD_CMD_RXDLY,
> +				      PAD_DELAY_HALF - 1);
> +			sdr_set_field(host->top_base + EMMC_TOP_CMD,
> PAD_CMD_RXDLY2,
> +				      value - PAD_DELAY_HALF);
> +		}
> +	} else {
> +		if (value < PAD_DELAY_HALF) {
> +			sdr_set_field(host->base + tune_reg,
> MSDC_PAD_TUNE_CMDRDLY,
> +				      value);
> +			sdr_set_field(host->base + tune_reg + 4,
> MSDC_PAD_TUNE_CMDRDLY2,
> +				      0);
> +		} else {
> +			sdr_set_field(host->base + tune_reg,
> MSDC_PAD_TUNE_CMDRDLY,
> +				      PAD_DELAY_HALF - 1);
> +			sdr_set_field(host->base + tune_reg + 4,
> MSDC_PAD_TUNE_CMDRDLY2,
> +				      value - PAD_DELAY_HALF);
> +		}
> +	}
>  }
>  
>  static inline void msdc_set_data_delay(struct msdc_host *host, u32
> value)
>  {
>  	u32 tune_reg = host->dev_comp->pad_tune_reg;
>  
> -	if (host->top_base)
> -		sdr_set_field(host->top_base + EMMC_TOP_CONTROL,
> -			      PAD_DAT_RD_RXDLY, value);
> -	else
> -		sdr_set_field(host->base + tune_reg,
> MSDC_PAD_TUNE_DATRRDLY,
> -			      value);
> +	if (host->top_base) {
> +		if (value < PAD_DELAY_HALF) {
> +			sdr_set_field(host->top_base +
> EMMC_TOP_CONTROL,
> +				      PAD_DAT_RD_RXDLY, value);
> +			sdr_set_field(host->top_base +
> EMMC_TOP_CONTROL,
> +				      PAD_DAT_RD_RXDLY2, 0);
> +		} else {
> +			sdr_set_field(host->top_base +
> EMMC_TOP_CONTROL,
> +				      PAD_DAT_RD_RXDLY, PAD_DELAY_HALF
> - 1);
> +			sdr_set_field(host->top_base +
> EMMC_TOP_CONTROL,
> +				      PAD_DAT_RD_RXDLY2, value -
> PAD_DELAY_HALF);
> +		}
> +	} else {
> +		if (value < PAD_DELAY_HALF) {
> +			sdr_set_field(host->base + tune_reg,
> MSDC_PAD_TUNE_DATRRDLY,
> +				      value);
> +			sdr_set_field(host->base + tune_reg + 4,
> MSDC_PAD_TUNE_DATRRDLY2,
> +				      0);
> +		} else {
> +			sdr_set_field(host->base + tune_reg,
> MSDC_PAD_TUNE_DATRRDLY,
> +				      PAD_DELAY_HALF - 1);
> +			sdr_set_field(host->base + tune_reg + 4,
> MSDC_PAD_TUNE_DATRRDLY2,
> +				      value - PAD_DELAY_HALF);
> +		}
> +	}
>  }
>  
>  static int msdc_tune_response(struct mmc_host *mmc, u32 opcode)
> @@ -2023,7 +2077,7 @@ static int msdc_tune_response(struct mmc_host
> *mmc, u32 opcode)
>  			      host->hs200_cmd_int_delay);
>  
>  	sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> -	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0 ; i < PAD_DELAY_HALF; i++) {
>  		msdc_set_cmd_delay(host, i);
>  		/*
>  		 * Using the same parameters, it may sometimes pass the
> test,
> @@ -2047,7 +2101,7 @@ static int msdc_tune_response(struct mmc_host
> *mmc, u32 opcode)
>  		goto skip_fall;
>  
>  	sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> -	for (i = 0; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0; i < PAD_DELAY_HALF; i++) {
>  		msdc_set_cmd_delay(host, i);
>  		/*
>  		 * Using the same parameters, it may sometimes pass the
> test,
> @@ -2082,7 +2136,7 @@ static int msdc_tune_response(struct mmc_host
> *mmc, u32 opcode)
>  	if (host->dev_comp->async_fifo || host->hs200_cmd_int_delay)
>  		goto skip_internal;
>  
> -	for (i = 0; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0; i < PAD_DELAY_HALF; i++) {
>  		sdr_set_field(host->base + tune_reg,
>  			      MSDC_PAD_TUNE_CMDRRDLY, i);
>  		mmc_send_tuning(mmc, opcode, &cmd_err);
> @@ -2121,7 +2175,7 @@ static int hs400_tune_response(struct mmc_host
> *mmc, u32 opcode)
>  		sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
>  	else
>  		sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> -	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0 ; i < PAD_DELAY_HALF; i++) {
>  		sdr_set_field(host->base + PAD_CMD_TUNE,
>  			      PAD_CMD_TUNE_RX_DLY3, i);
>  		/*
> @@ -2160,7 +2214,7 @@ static int msdc_tune_data(struct mmc_host *mmc,
> u32 opcode)
>  		      host->latch_ck);
>  	sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_DSPL);
>  	sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_W_DSPL);
> -	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0 ; i < PAD_DELAY_HALF; i++) {
>  		msdc_set_data_delay(host, i);
>  		ret = mmc_send_tuning(mmc, opcode, NULL);
>  		if (!ret)
> @@ -2174,7 +2228,7 @@ static int msdc_tune_data(struct mmc_host *mmc,
> u32 opcode)
>  
>  	sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_DSPL);
>  	sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_W_DSPL);
> -	for (i = 0; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0; i < PAD_DELAY_HALF; i++) {
>  		msdc_set_data_delay(host, i);
>  		ret = mmc_send_tuning(mmc, opcode, NULL);
>  		if (!ret)
> @@ -2206,10 +2260,11 @@ static int msdc_tune_data(struct mmc_host
> *mmc, u32 opcode)
>  static int msdc_tune_together(struct mmc_host *mmc, u32 opcode)
>  {
>  	struct msdc_host *host = mmc_priv(mmc);
> -	u32 rise_delay = 0, fall_delay = 0;
> +	u64 rise_delay = 0, fall_delay = 0;
>  	struct msdc_delay_phase final_rise_delay, final_fall_delay = {
> 0,};
>  	u8 final_delay, final_maxlen;
>  	int i, ret;
> +	int tune_steps = host->tune_64steps ? PAD_DELAY_FULL :
> PAD_DELAY_HALF;
>  
>  	sdr_set_field(host->base + MSDC_PATCH_BIT,
> MSDC_INT_DAT_LATCH_CK_SEL,
>  		      host->latch_ck);
> @@ -2217,7 +2272,7 @@ static int msdc_tune_together(struct mmc_host
> *mmc, u32 opcode)
>  	sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
>  	sdr_clr_bits(host->base + MSDC_IOCON,
>  		     MSDC_IOCON_DSPL | MSDC_IOCON_W_DSPL);
> -	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0 ; i < tune_steps; i++) {
>  		msdc_set_cmd_delay(host, i);
>  		msdc_set_data_delay(host, i);
>  		ret = mmc_send_tuning(mmc, opcode, NULL);
> @@ -2233,7 +2288,7 @@ static int msdc_tune_together(struct mmc_host
> *mmc, u32 opcode)
>  	sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
>  	sdr_set_bits(host->base + MSDC_IOCON,
>  		     MSDC_IOCON_DSPL | MSDC_IOCON_W_DSPL);
> -	for (i = 0; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0; i < tune_steps; i++) {
>  		msdc_set_cmd_delay(host, i);
>  		msdc_set_data_delay(host, i);
>  		ret = mmc_send_tuning(mmc, opcode, NULL);
> @@ -2346,7 +2401,7 @@ static int msdc_execute_hs400_tuning(struct
> mmc_host *mmc, struct mmc_card *card
>  	}
>  
>  	host->hs400_tuning = true;
> -	for (i = 0; i < PAD_DELAY_MAX; i++) {
> +	for (i = 0; i < PAD_DELAY_HALF; i++) {
>  		if (host->top_base)
>  			sdr_set_field(host->top_base +
> EMMC50_PAD_DS_TUNE,
>  				      PAD_DS_DLY1, i);
> @@ -2601,6 +2656,12 @@ static void msdc_of_property_parse(struct
> platform_device *pdev,
>  	else
>  		host->hs400_cmd_resp_sel_rising = false;
>  
> +	if (of_property_read_bool(pdev->dev.of_node,
> +				  "mediatek,tune-64-steps"))
> +		host->tune_64steps = true;
> +	else
> +		host->tune_64steps = false;
> +
>  	if (of_property_read_bool(pdev->dev.of_node,
>  				  "supports-cqe"))
>  		host->cqhci = true;


More information about the linux-arm-kernel mailing list