[PATCH 2/2] mmc: mmci: stm32: use a buffer for unaligned DMA requests

Yann Gautier yann.gautier at foss.st.com
Fri Mar 25 07:01:45 PDT 2022


On 3/25/22 14:43, Ulf Hansson wrote:
> On Thu, 24 Mar 2022 at 17:23, Yann Gautier <yann.gautier at foss.st.com> wrote:
>>
>> On 3/24/22 12:55, Ulf Hansson wrote:
>>> On Thu, 17 Mar 2022 at 12:19, Yann Gautier <yann.gautier at foss.st.com> wrote:
>>>>
>>>> In SDIO mode, the sg list for requests can be unaligned with what the
>>>> STM32 SDMMC internal DMA can support. In that case, instead of failing,
>>>> use a temporary bounce buffer to copy from/to the sg list.
>>>> This buffer is limited to 1MB. But for that we need to also limit
>>>> max_req_size to 1MB. It has not shown any throughput penalties for
>>>> SD-cards or eMMC.
>>>>
>>>> Signed-off-by: Yann Gautier <yann.gautier at foss.st.com>
>>>> ---
>>>>    drivers/mmc/host/mmci_stm32_sdmmc.c | 80 +++++++++++++++++++++++------
>>>>    1 file changed, 63 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
>>>> index 4566d7fc9055..a4414e32800f 100644
>>>> --- a/drivers/mmc/host/mmci_stm32_sdmmc.c
>>>> +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
>>>> @@ -43,6 +43,9 @@ struct sdmmc_lli_desc {
>>>>    struct sdmmc_idma {
>>>>           dma_addr_t sg_dma;
>>>>           void *sg_cpu;
>>>> +       dma_addr_t bounce_dma_addr;
>>>> +       void *bounce_buf;
>>>> +       bool use_bounce_buffer;
>>>>    };
>>>>
>>>>    struct sdmmc_dlyb {
>>>> @@ -54,6 +57,7 @@ struct sdmmc_dlyb {
>>>>    static int sdmmc_idma_validate_data(struct mmci_host *host,
>>>>                                       struct mmc_data *data)
>>>>    {
>>>> +       struct sdmmc_idma *idma = host->dma_priv;
>>>>           struct scatterlist *sg;
>>>>           int i;
>>>>
>>>> @@ -61,21 +65,23 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
>>>>            * idma has constraints on idmabase & idmasize for each element
>>>>            * excepted the last element which has no constraint on idmasize
>>>>            */
>>>> +       idma->use_bounce_buffer = false;
>>>>           for_each_sg(data->sg, sg, data->sg_len - 1, i) {
>>>>                   if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
>>>>                       !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
>>>> -                       dev_err(mmc_dev(host->mmc),
>>>> +                       dev_dbg(mmc_dev(host->mmc),
>>>>                                   "unaligned scatterlist: ofst:%x length:%d\n",
>>>>                                   data->sg->offset, data->sg->length);
>>>> -                       return -EINVAL;
>>>> +                       idma->use_bounce_buffer = true;
>>>> +                       return 0;
>>>>                   }
>>>>           }
>>>>
>>>>           if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
>>>> -               dev_err(mmc_dev(host->mmc),
>>>> +               dev_dbg(mmc_dev(host->mmc),
>>>>                           "unaligned last scatterlist: ofst:%x length:%d\n",
>>>>                           data->sg->offset, data->sg->length);
>>>> -               return -EINVAL;
>>>> +               idma->use_bounce_buffer = true;
>>>>           }
>>>>
>>>>           return 0;
>>>> @@ -84,18 +90,29 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
>>>>    static int _sdmmc_idma_prep_data(struct mmci_host *host,
>>>>                                    struct mmc_data *data)
>>>>    {
>>>> -       int n_elem;
>>>> +       struct sdmmc_idma *idma = host->dma_priv;
>>>>
>>>> -       n_elem = dma_map_sg(mmc_dev(host->mmc),
>>>> -                           data->sg,
>>>> -                           data->sg_len,
>>>> -                           mmc_get_dma_dir(data));
>>>> +       if (idma->use_bounce_buffer) {
>>>> +               if (data->flags & MMC_DATA_WRITE) {
>>>> +                       unsigned int xfer_bytes = data->blksz * data->blocks;
>>>>
>>>> -       if (!n_elem) {
>>>> -               dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
>>>> -               return -EINVAL;
>>>> -       }
>>>> +                       sg_copy_to_buffer(data->sg, data->sg_len,
>>>> +                                         idma->bounce_buf, xfer_bytes);
>>>> +                       dma_wmb();
>>>> +               }
>>>> +       } else {
>>>> +               int n_elem;
>>>> +
>>>> +               n_elem = dma_map_sg(mmc_dev(host->mmc),
>>>> +                                   data->sg,
>>>> +                                   data->sg_len,
>>>> +                                   mmc_get_dma_dir(data));
>>>>
>>>> +               if (!n_elem) {
>>>> +                       dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
>>>> +                       return -EINVAL;
>>>> +               }
>>>> +       }
>>>>           return 0;
>>>>    }
>>>>
>>>> @@ -112,8 +129,19 @@ static int sdmmc_idma_prep_data(struct mmci_host *host,
>>>>    static void sdmmc_idma_unprep_data(struct mmci_host *host,
>>>>                                      struct mmc_data *data, int err)
>>>>    {
>>>> -       dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
>>>> -                    mmc_get_dma_dir(data));
>>>> +       struct sdmmc_idma *idma = host->dma_priv;
>>>> +
>>>> +       if (idma->use_bounce_buffer) {
>>>> +               if (data->flags & MMC_DATA_READ) {
>>>> +                       unsigned int xfer_bytes = data->blksz * data->blocks;
>>>> +
>>>> +                       sg_copy_from_buffer(data->sg, data->sg_len,
>>>> +                                           idma->bounce_buf, xfer_bytes);
>>>> +               }
>>>> +       } else {
>>>> +               dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
>>>> +                            mmc_get_dma_dir(data));
>>>> +       }
>>>>    }
>>>>
>>>>    static int sdmmc_idma_setup(struct mmci_host *host)
>>>> @@ -137,6 +165,16 @@ static int sdmmc_idma_setup(struct mmci_host *host)
>>>>                   host->mmc->max_segs = SDMMC_LLI_BUF_LEN /
>>>>                           sizeof(struct sdmmc_lli_desc);
>>>>                   host->mmc->max_seg_size = host->variant->stm32_idmabsize_mask;
>>>> +
>>>> +               host->mmc->max_req_size = SZ_1M;
>>>> +               idma->bounce_buf = dmam_alloc_coherent(dev,
>>>> +                                                      host->mmc->max_req_size,
>>>> +                                                      &idma->bounce_dma_addr,
>>>> +                                                      GFP_KERNEL);
>>>> +               if (!idma->bounce_buf) {
>>>> +                       dev_err(dev, "Unable to map allocate DMA bounce buffer.\n");
>>>> +                       return -ENOMEM;
>>>
>> Hi Ulf,
>>
>>> If we fail to allocate the 1M bounce buffer, then we end up always
>>> using a PIO based mode, right?
>>>
>>> Perhaps we can allow the above allocation to fail, but then limit us
>>> to use DMA only when the buffers are properly aligned? Would that
>>> work?
>>>
>> We have never supported PIO mode with STM32 variant.
>> We only support DMA single buffer or DMA LLI.
>> As we cannot have DMA LLI for unaligned access, we'll default to single
>> mode.
> 
> Right, I was looking at the legacy variant, which uses PIO as
> fallback. Sorry for my ignorance.
> 
>> If allocation fails, it then won't work.
> 
> Right, but that's only part of the issue, I think.
> 
>> Maybe we shouldn't fail here, and just check idma->bounce_buf in
>> validate data function. If buffer is not allocated, we just return
>> -EINVAL as it was done before.
> 
> Yes, something along those lines. However, there is another problem
> too, which is that the allocation will be done for each instance of
> the host that is probed. In all cases but the SDIO case, this would be
> a waste, right?
> 
> Perhaps we should manage the allocation in the validate function too
> (de-allocation should be handled at ->remove()). In this way, the
> buffer will only be allocated when it's actually needed. Yes, it would
> add a latency while serving the *first* request that has unaligned
> buffers, but I guess we can live with that?
> 
Hi Ulf,

That makes sense, I'll rework the validate data function with this.
I'll push a new version soon.

Thanks,
Yann
>>
>> Best regards,
>> Yann
> 
> Kind regards
> Uffe
> 
>>
>>>> +               }
>>>>           } else {
>>>>                   host->mmc->max_segs = 1;
>>>>                   host->mmc->max_seg_size = host->mmc->max_req_size;
>>>> @@ -154,8 +192,16 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
>>>>           struct scatterlist *sg;
>>>>           int i;
>>>>
>>>> -       if (!host->variant->dma_lli || data->sg_len == 1) {
>>>> -               writel_relaxed(sg_dma_address(data->sg),
>>>> +       if (!host->variant->dma_lli || data->sg_len == 1 ||
>>>> +           idma->use_bounce_buffer) {
>>>> +               u32 dma_addr;
>>>> +
>>>> +               if (idma->use_bounce_buffer)
>>>> +                       dma_addr = idma->bounce_dma_addr;
>>>> +               else
>>>> +                       dma_addr = sg_dma_address(data->sg);
>>>> +
>>>> +               writel_relaxed(dma_addr,
>>>>                                  host->base + MMCI_STM32_IDMABASE0R);
>>>>                   writel_relaxed(MMCI_STM32_IDMAEN,
>>>>                                  host->base + MMCI_STM32_IDMACTRLR);
>>>
>>> Kind regards
>>> Uffe
>>




More information about the linux-arm-kernel mailing list