[PATCH v2] mmc: mtk-sd: Implement Host Software Queue for eMMC and SD Card

Ulf Hansson ulf.hansson at linaro.org
Tue Oct 8 07:35:57 PDT 2024


On Mon, 30 Sept 2024 at 11:01, AngeloGioacchino Del Regno
<angelogioacchino.delregno at collabora.com> wrote:
>
> Add support for Host Software Queue (HSQ) and enable it when the
> controller instance does not have Command Queue Engine HW support.
>
> It was chosen to enable HSQ only for eMMC and SD/MicroSD cards
> and not for SDIO as performance improvements are seen only for
> the former.
>
> Performance was measured with a SanDisk Extreme Ultra A2 MicroSD
> card in a MediaTek MT8195T Acer Chromebook Spin 513 (CP513-2H),
> by running FIO (bs=4k) on an ArchLinux userspace.
>
> .... Summarizing ....
> Random read:     +24.28% IOPS, +24.29% BW
> Sequential read: +3.14%  IOPS, +3.49%  BW
> Random RW (avg): +50.53% IOPS, +50.68% BW
>
> Below, more data from the benchmarks.
>
> Before:
>  - Random read: IOPS=1643, BW=6574KiB/s
>    bw (  KiB/s): min= 4578, max= 7440, per=99.95%, avg=6571.55, stdev=74.16, samples=953
>    iops        : min= 1144, max= 1860, avg=1642.14, stdev=18.54, samples=953
>    lat (msec)  : 100=0.01%, 250=0.12%, 500=0.38%, 750=97.89%, 1000=1.44%, 2000=0.16%
>  - Sequential read: IOPS=19.1k, BW=74.4MiB/s
>    bw (  KiB/s): min=12288, max=118483, per=100.00%, avg=76293.38, stdev=1971.42, samples=956
>    iops        : min= 3072, max=29620, avg=19072.14, stdev=492.87, samples=956
>    lat (msec)  : 4=0.01%, 10=0.01%, 20=0.21%, 50=23.95%, 100=75.67%, 250=0.05%, 500=0.03%, 750=0.08%
>  - Random R/W: read: IOPS=282, BW=1129KiB/s (1156kB/s)  write: IOPS=284, BW=1136KiB/s
>    read bw (  KiB/s): min=   31, max= 3496, per=100.00%, avg=1703.67, stdev=155.42, samples=630
>    read iops        : min=    7, max=  873, avg=425.22, stdev=38.85, samples=630
>    wri  bw (  KiB/s): min=   31, max= 3443, per=100.00%, avg=1674.27, stdev=164.23, samples=644
>    wri  iops        : min=    7, max=  860, avg=417.87, stdev=41.03, samples=644
>    lat (msec)   : 250=0.13%, 500=0.44%, 750=0.84%, 1000=22.29%, 2000=74.01%, >=2000=2.30%
>
> After:
>  - Random read: IOPS=2042, BW=8171KiB/s
>    bw (  KiB/s): min= 4907, max= 9072, per=99.94%, avg=8166.80, stdev=93.77, samples=954
>    iops        : min= 1226, max= 2268, avg=2040.78, stdev=23.41, samples=954
>    lat (msec)   : 100=0.03%, 250=0.13%, 500=52.88%, 750=46.64%, 1000=0.32%
>  - Sequential read: IOPS=19.7k, BW=77.0MiB/s
>    bw (  KiB/s): min=67980, max=94248, per=100.00%, avg=78894.27, stdev=1475.07, samples=956
>    iops        : min=16994, max=23562, avg=19722.45, stdev=368.76, samples=956
>    lat (msec)   : 4=0.01%, 10=0.01%, 20=0.05%, 50=28.78%, 100=71.14%, 250=0.01%, 500=0.02%
>  - Random R/W: read: IOPS=424, BW=1699KiB/s  write: IOPS=428, BW=1714KiB/s
>    read bw (  KiB/s): min=  228, max= 2856, per=100.00%, avg=1796.60, stdev=112.59, samples=901
>    read iops        : min=   54, max=  712, avg=447.81, stdev=28.21, samples=901
>    wri  bw (  KiB/s): min=   28, max= 2904, per=100.00%, avg=1780.11, stdev=128.27, samples=916
>    wri  iops        : min=    4, max=  724, avg=443.69, stdev=32.14, samples=916
>
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno at collabora.com>

Applied for next, thanks!

Kind regards
Uffe


> ---
>
> Changes in v2:
>  - Added missing `select MMC_HSQ` for MMC_MTK Kconfig
>
>  drivers/mmc/host/Kconfig  |  1 +
>  drivers/mmc/host/mtk-sd.c | 49 +++++++++++++++++++++++++++++++++++++--
>  2 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
> index 7199cb0bd0b9..0ba5a9f769fb 100644
> --- a/drivers/mmc/host/Kconfig
> +++ b/drivers/mmc/host/Kconfig
> @@ -1009,6 +1009,7 @@ config MMC_MTK
>         depends on COMMON_CLK
>         select REGULATOR
>         select MMC_CQHCI
> +       select MMC_HSQ
>         help
>           This selects the MediaTek(R) Secure digital and Multimedia card Interface.
>           If you have a machine with a integrated SD/MMC card reader, say Y or M here.
> diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
> index 5165a33bf74b..a9a554bd3f44 100644
> --- a/drivers/mmc/host/mtk-sd.c
> +++ b/drivers/mmc/host/mtk-sd.c
> @@ -33,6 +33,7 @@
>  #include <linux/mmc/slot-gpio.h>
>
>  #include "cqhci.h"
> +#include "mmc_hsq.h"
>
>  #define MAX_BD_NUM          1024
>  #define MSDC_NR_CLOCKS      3
> @@ -473,6 +474,7 @@ struct msdc_host {
>         bool hs400_tuning;      /* hs400 mode online tuning */
>         bool internal_cd;       /* Use internal card-detect logic */
>         bool cqhci;             /* support eMMC hw cmdq */
> +       bool hsq_en;            /* Host Software Queue is enabled */
>         struct msdc_save_para save_para; /* used when gate HCLK */
>         struct msdc_tune_para def_tune_para; /* default tune setting */
>         struct msdc_tune_para saved_tune_para; /* tune result of CMD21/CMD19 */
> @@ -1170,7 +1172,9 @@ static void msdc_track_cmd_data(struct msdc_host *host, struct mmc_command *cmd)
>
>  static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
>  {
> +       struct mmc_host *mmc = mmc_from_priv(host);
>         unsigned long flags;
> +       bool hsq_req_done;
>
>         /*
>          * No need check the return value of cancel_delayed_work, as only ONE
> @@ -1178,6 +1182,27 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
>          */
>         cancel_delayed_work(&host->req_timeout);
>
> +       /*
> +        * If the request was handled from Host Software Queue, there's almost
> +        * nothing to do here, and we also don't need to reset mrq as any race
> +        * condition would not have any room to happen, since HSQ stores the
> +        * "scheduled" mrqs in an internal array of mrq slots anyway.
> +        * However, if the controller experienced an error, we still want to
> +        * reset it as soon as possible.
> +        *
> +        * Note that non-HSQ requests will still be happening at times, even
> +        * though it is enabled, and that's what is going to reset host->mrq.
> +        * Also, msdc_unprepare_data() is going to be called by HSQ when needed
> +        * as HSQ request finalization will eventually call the .post_req()
> +        * callback of this driver which, in turn, unprepares the data.
> +        */
> +       hsq_req_done = host->hsq_en ? mmc_hsq_finalize_request(mmc, mrq) : false;
> +       if (hsq_req_done) {
> +               if (host->error)
> +                       msdc_reset_hw(host);
> +               return;
> +       }
> +
>         spin_lock_irqsave(&host->lock, flags);
>         host->mrq = NULL;
>         spin_unlock_irqrestore(&host->lock, flags);
> @@ -1187,7 +1212,7 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
>                 msdc_unprepare_data(host, mrq->data);
>         if (host->error)
>                 msdc_reset_hw(host);
> -       mmc_request_done(mmc_from_priv(host), mrq);
> +       mmc_request_done(mmc, mrq);
>         if (host->dev_comp->recheck_sdio_irq)
>                 msdc_recheck_sdio_irq(host);
>  }
> @@ -1347,7 +1372,7 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq)
>         struct msdc_host *host = mmc_priv(mmc);
>
>         host->error = 0;
> -       WARN_ON(host->mrq);
> +       WARN_ON(!host->hsq_en && host->mrq);
>         host->mrq = mrq;
>
>         if (mrq->data)
> @@ -2916,6 +2941,19 @@ static int msdc_drv_probe(struct platform_device *pdev)
>                 mmc->max_seg_size = 64 * 1024;
>                 /* Reduce CIT to 0x40 that corresponds to 2.35us */
>                 msdc_cqe_cit_cal(host, 2350);
> +       } else if (mmc->caps2 & MMC_CAP2_NO_SDIO) {
> +               /* Use HSQ on eMMC/SD (but not on SDIO) if HW CQE not supported */
> +               struct mmc_hsq *hsq = devm_kzalloc(&pdev->dev, sizeof(*hsq), GFP_KERNEL);
> +               if (!hsq) {
> +                       ret = -ENOMEM;
> +                       goto release;
> +               }
> +
> +               ret = mmc_hsq_init(hsq, mmc);
> +               if (ret)
> +                       goto release;
> +
> +               host->hsq_en = true;
>         }
>
>         ret = devm_request_irq(&pdev->dev, host->irq, msdc_irq,
> @@ -3043,6 +3081,9 @@ static int __maybe_unused msdc_runtime_suspend(struct device *dev)
>         struct mmc_host *mmc = dev_get_drvdata(dev);
>         struct msdc_host *host = mmc_priv(mmc);
>
> +       if (host->hsq_en)
> +               mmc_hsq_suspend(mmc);
> +
>         msdc_save_reg(host);
>
>         if (sdio_irq_claimed(mmc)) {
> @@ -3073,6 +3114,10 @@ static int __maybe_unused msdc_runtime_resume(struct device *dev)
>                 pinctrl_select_state(host->pinctrl, host->pins_uhs);
>                 enable_irq(host->irq);
>         }
> +
> +       if (host->hsq_en)
> +               mmc_hsq_resume(mmc);
> +
>         return 0;
>  }
>
> --
> 2.46.1
>



More information about the linux-arm-kernel mailing list