[PATCH] mmc: meson-gx: add support for descriptor chain mode
Ulf Hansson
ulf.hansson at linaro.org
Tue Apr 18 12:15:37 PDT 2017
On 7 April 2017 at 21:22, Heiner Kallweit <hkallweit1 at gmail.com> wrote:
> So far a bounce buffer is used to serialize the scatterlist(s).
> This overhead can be avoided by switching to descriptor chain mode.
> As result the performance is drastically improved. On a Odroid-C2 with
> a 128 GB eMMC module raw reads reach 140 MB/s.
>
> Prerequisite for descriptor chain mode is that all scatterlist buffers
> are 8 byte aligned for 64-bit DMA. That's not always the case, at least
> the brcmfmac SDIO WiFi driver is known to cause problems.
>
> Therefore, for each request, check upfront whether all scatterlist
> buffers are 8 byte aligned and fall back to bounce buffer mode if
> that's not the case.
>
> Signed-off-by: Heiner Kallweit <hkallweit1 at gmail.com>
Thanks, applied for next!
Kind regards
Uffe
> ---
> drivers/mmc/host/meson-gx-mmc.c | 172 ++++++++++++++++++++++++++++++++++++----
> 1 file changed, 156 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
> index eb5ee68d..1842ed34 100644
> --- a/drivers/mmc/host/meson-gx-mmc.c
> +++ b/drivers/mmc/host/meson-gx-mmc.c
> @@ -111,6 +111,11 @@
> #define SD_EMMC_CMD_TIMEOUT 1024 /* in ms */
> #define SD_EMMC_CMD_TIMEOUT_DATA 4096 /* in ms */
> #define SD_EMMC_CFG_CMD_GAP 16 /* in clock cycles */
> +#define SD_EMMC_DESC_BUF_LEN PAGE_SIZE
> +
> +#define SD_EMMC_PRE_REQ_DONE BIT(0)
> +#define SD_EMMC_DESC_CHAIN_MODE BIT(1)
> +
> #define MUX_CLK_NUM_PARENTS 2
>
> struct meson_tuning_params {
> @@ -119,6 +124,13 @@ struct meson_tuning_params {
> u8 rx_phase;
> };
>
> +struct sd_emmc_desc {
> + u32 cmd_cfg;
> + u32 cmd_arg;
> + u32 cmd_data;
> + u32 cmd_resp;
> +};
> +
> struct meson_host {
> struct device *dev;
> struct mmc_host *mmc;
> @@ -137,18 +149,13 @@ struct meson_host {
> unsigned int bounce_buf_size;
> void *bounce_buf;
> dma_addr_t bounce_dma_addr;
> + struct sd_emmc_desc *descs;
> + dma_addr_t descs_dma_addr;
>
> struct meson_tuning_params tp;
> bool vqmmc_enabled;
> };
>
> -struct sd_emmc_desc {
> - u32 cmd_cfg;
> - u32 cmd_arg;
> - u32 cmd_data;
> - u32 cmd_resp;
> -};
> -
> #define CMD_CFG_LENGTH_MASK GENMASK(8, 0)
> #define CMD_CFG_BLOCK_MODE BIT(9)
> #define CMD_CFG_R1B BIT(10)
> @@ -195,6 +202,66 @@ static struct mmc_command *meson_mmc_get_next_command(struct mmc_command *cmd)
> return NULL;
> }
>
> +static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
> + struct mmc_request *mrq)
> +{
> + struct mmc_data *data = mrq->data;
> + struct scatterlist *sg;
> + int i;
> + bool use_desc_chain_mode = true;
> +
> + for_each_sg(data->sg, sg, data->sg_len, i)
> + /* check for 8 byte alignment */
> + if (sg->offset & 7) {
> + WARN_ONCE(1, "unaligned scatterlist buffer\n");
> + use_desc_chain_mode = false;
> + break;
> + }
> +
> + if (use_desc_chain_mode)
> + data->host_cookie |= SD_EMMC_DESC_CHAIN_MODE;
> +}
> +
> +static inline bool meson_mmc_desc_chain_mode(const struct mmc_data *data)
> +{
> + return data->host_cookie & SD_EMMC_DESC_CHAIN_MODE;
> +}
> +
> +static inline bool meson_mmc_bounce_buf_read(const struct mmc_data *data)
> +{
> + return data && data->flags & MMC_DATA_READ &&
> + !meson_mmc_desc_chain_mode(data);
> +}
> +
> +static void meson_mmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
> +{
> + struct mmc_data *data = mrq->data;
> +
> + if (!data)
> + return;
> +
> + meson_mmc_get_transfer_mode(mmc, mrq);
> + data->host_cookie |= SD_EMMC_PRE_REQ_DONE;
> +
> + if (!meson_mmc_desc_chain_mode(data))
> + return;
> +
> + data->sg_count = dma_map_sg(mmc_dev(mmc), data->sg, data->sg_len,
> + mmc_get_dma_dir(data));
> + if (!data->sg_count)
> + dev_err(mmc_dev(mmc), "dma_map_sg failed");
> +}
> +
> +static void meson_mmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
> + int err)
> +{
> + struct mmc_data *data = mrq->data;
> +
> + if (data && meson_mmc_desc_chain_mode(data) && data->sg_count)
> + dma_unmap_sg(mmc_dev(mmc), data->sg, data->sg_len,
> + mmc_get_dma_dir(data));
> +}
> +
> static int meson_mmc_clk_set(struct meson_host *host, unsigned long clk_rate)
> {
> struct mmc_host *mmc = host->mmc;
> @@ -509,6 +576,44 @@ static void meson_mmc_set_response_bits(struct mmc_command *cmd, u32 *cmd_cfg)
> }
> }
>
> +static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg)
> +{
> + struct meson_host *host = mmc_priv(mmc);
> + struct sd_emmc_desc *desc = host->descs;
> + struct mmc_data *data = host->cmd->data;
> + struct scatterlist *sg;
> + u32 start;
> + int i;
> +
> + if (data->flags & MMC_DATA_WRITE)
> + cmd_cfg |= CMD_CFG_DATA_WR;
> +
> + if (data->blocks > 1) {
> + cmd_cfg |= CMD_CFG_BLOCK_MODE;
> + meson_mmc_set_blksz(mmc, data->blksz);
> + }
> +
> + for_each_sg(data->sg, sg, data->sg_count, i) {
> + unsigned int len = sg_dma_len(sg);
> +
> + if (data->blocks > 1)
> + len /= data->blksz;
> +
> + desc[i].cmd_cfg = cmd_cfg;
> + desc[i].cmd_cfg |= FIELD_PREP(CMD_CFG_LENGTH_MASK, len);
> + if (i > 0)
> + desc[i].cmd_cfg |= CMD_CFG_NO_CMD;
> + desc[i].cmd_arg = host->cmd->arg;
> + desc[i].cmd_resp = 0;
> + desc[i].cmd_data = sg_dma_address(sg);
> + }
> + desc[data->sg_count - 1].cmd_cfg |= CMD_CFG_END_OF_CHAIN;
> +
> + dma_wmb(); /* ensure descriptor is written before kicked */
> + start = host->descs_dma_addr | START_DESC_BUSY;
> + writel(start, host->regs + SD_EMMC_START);
> +}
> +
> static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
> {
> struct meson_host *host = mmc_priv(mmc);
> @@ -519,6 +624,8 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
> /* Setup descriptors */
> dma_rmb();
>
> + host->cmd = cmd;
> +
> cmd_cfg |= FIELD_PREP(CMD_CFG_CMD_INDEX_MASK, cmd->opcode);
> cmd_cfg |= CMD_CFG_OWNER; /* owned by CPU */
>
> @@ -526,10 +633,16 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
>
> /* data? */
> if (data) {
> + data->bytes_xfered = 0;
> cmd_cfg |= CMD_CFG_DATA_IO;
> cmd_cfg |= FIELD_PREP(CMD_CFG_TIMEOUT_MASK,
> ilog2(meson_mmc_get_timeout_msecs(data)));
>
> + if (meson_mmc_desc_chain_mode(data)) {
> + meson_mmc_desc_chain_transfer(mmc, cmd_cfg);
> + return;
> + }
> +
> if (data->blocks > 1) {
> cmd_cfg |= CMD_CFG_BLOCK_MODE;
> cmd_cfg |= FIELD_PREP(CMD_CFG_LENGTH_MASK,
> @@ -539,7 +652,6 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
> cmd_cfg |= FIELD_PREP(CMD_CFG_LENGTH_MASK, data->blksz);
> }
>
> - data->bytes_xfered = 0;
> xfer_bytes = data->blksz * data->blocks;
> if (data->flags & MMC_DATA_WRITE) {
> cmd_cfg |= CMD_CFG_DATA_WR;
> @@ -555,8 +667,6 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
> ilog2(SD_EMMC_CMD_TIMEOUT));
> }
>
> - host->cmd = cmd;
> -
> /* Last descriptor */
> cmd_cfg |= CMD_CFG_END_OF_CHAIN;
> writel(cmd_cfg, host->regs + SD_EMMC_CMD_CFG);
> @@ -569,14 +679,25 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
> static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
> {
> struct meson_host *host = mmc_priv(mmc);
> + bool needs_pre_post_req = mrq->data &&
> + !(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE);
> +
> + if (needs_pre_post_req) {
> + meson_mmc_get_transfer_mode(mmc, mrq);
> + if (!meson_mmc_desc_chain_mode(mrq->data))
> + needs_pre_post_req = false;
> + }
> +
> + if (needs_pre_post_req)
> + meson_mmc_pre_req(mmc, mrq);
>
> /* Stop execution */
> writel(0, host->regs + SD_EMMC_START);
>
> - if (mrq->sbc)
> - meson_mmc_start_cmd(mmc, mrq->sbc);
> - else
> - meson_mmc_start_cmd(mmc, mrq->cmd);
> + meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd);
> +
> + if (needs_pre_post_req)
> + meson_mmc_post_req(mmc, mrq, 0);
> }
>
> static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
> @@ -654,7 +775,9 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
> if (status & (IRQ_END_OF_CHAIN | IRQ_RESP_STATUS)) {
> if (data && !cmd->error)
> data->bytes_xfered = data->blksz * data->blocks;
> - ret = IRQ_WAKE_THREAD;
> + if (meson_mmc_bounce_buf_read(data) ||
> + meson_mmc_get_next_command(cmd))
> + ret = IRQ_WAKE_THREAD;
> } else {
> dev_warn(host->dev, "Unknown IRQ! status=0x%04x: MMC CMD%u arg=0x%08x flags=0x%08x stop=%d\n",
> status, cmd->opcode, cmd->arg,
> @@ -691,7 +814,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
> return IRQ_NONE;
>
> data = cmd->data;
> - if (data && data->flags & MMC_DATA_READ) {
> + if (meson_mmc_bounce_buf_read(data)) {
> xfer_bytes = data->blksz * data->blocks;
> WARN_ON(xfer_bytes > host->bounce_buf_size);
> sg_copy_from_buffer(data->sg, data->sg_len,
> @@ -760,6 +883,8 @@ static const struct mmc_host_ops meson_mmc_ops = {
> .request = meson_mmc_request,
> .set_ios = meson_mmc_set_ios,
> .get_cd = meson_mmc_get_cd,
> + .pre_req = meson_mmc_pre_req,
> + .post_req = meson_mmc_post_req,
> .execute_tuning = meson_mmc_execute_tuning,
> };
>
> @@ -845,6 +970,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
> mmc->caps |= MMC_CAP_CMD23;
> mmc->max_blk_count = CMD_CFG_LENGTH_MASK;
> mmc->max_req_size = mmc->max_blk_count * mmc->max_blk_size;
> + mmc->max_segs = SD_EMMC_DESC_BUF_LEN / sizeof(struct sd_emmc_desc);
> + mmc->max_seg_size = mmc->max_req_size;
>
> /* data bounce buffer */
> host->bounce_buf_size = mmc->max_req_size;
> @@ -857,11 +984,22 @@ static int meson_mmc_probe(struct platform_device *pdev)
> goto err_div_clk;
> }
>
> + host->descs = dma_alloc_coherent(host->dev, SD_EMMC_DESC_BUF_LEN,
> + &host->descs_dma_addr, GFP_KERNEL);
> + if (!host->descs) {
> + dev_err(host->dev, "Allocating descriptor DMA buffer failed\n");
> + ret = -ENOMEM;
> + goto err_bounce_buf;
> + }
> +
> mmc->ops = &meson_mmc_ops;
> mmc_add_host(mmc);
>
> return 0;
>
> +err_bounce_buf:
> + dma_free_coherent(host->dev, host->bounce_buf_size,
> + host->bounce_buf, host->bounce_dma_addr);
> err_div_clk:
> clk_disable_unprepare(host->cfg_div_clk);
> err_core_clk:
> @@ -880,6 +1018,8 @@ static int meson_mmc_remove(struct platform_device *pdev)
> /* disable interrupts */
> writel(0, host->regs + SD_EMMC_IRQ_EN);
>
> + dma_free_coherent(host->dev, SD_EMMC_DESC_BUF_LEN,
> + host->descs, host->descs_dma_addr);
> dma_free_coherent(host->dev, host->bounce_buf_size,
> host->bounce_buf, host->bounce_dma_addr);
>
> --
> 2.12.2
>
More information about the linux-amlogic
mailing list