[PATCH 1/5] block: enable batched allocation for blk_mq_alloc_request()

Anuj gupta anuj1072538 at gmail.com
Wed Sep 28 06:38:19 PDT 2022


On Tue, Sep 27, 2022 at 7:19 AM Jens Axboe <axboe at kernel.dk> wrote:
>
> The filesystem IO path can take advantage of allocating batches of
> requests, if the underlying submitter tells the block layer about it
> through the blk_plug. For passthrough IO, the exported API is the
> blk_mq_alloc_request() helper, and that one does not allow for
> request caching.
>
> Wire up request caching for blk_mq_alloc_request(), which is generally
> done without having a bio available upfront.
>
> Signed-off-by: Jens Axboe <axboe at kernel.dk>
> ---
>  block/blk-mq.c | 80 ++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 71 insertions(+), 9 deletions(-)
>
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index c11949d66163..d3a9f8b9c7ee 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -510,25 +510,87 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>                                         alloc_time_ns);
>  }
>
> -struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
> -               blk_mq_req_flags_t flags)
> +static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
> +                                           struct blk_plug *plug,
> +                                           blk_opf_t opf,
> +                                           blk_mq_req_flags_t flags)
>  {
>         struct blk_mq_alloc_data data = {
>                 .q              = q,
>                 .flags          = flags,
>                 .cmd_flags      = opf,
> -               .nr_tags        = 1,
> +               .nr_tags        = plug->nr_ios,
> +               .cached_rq      = &plug->cached_rq,
>         };
>         struct request *rq;
> -       int ret;
>
> -       ret = blk_queue_enter(q, flags);
> -       if (ret)
> -               return ERR_PTR(ret);
> +       if (blk_queue_enter(q, flags))
> +               return NULL;
> +
> +       plug->nr_ios = 1;
>
>         rq = __blk_mq_alloc_requests(&data);
> -       if (!rq)
> -               goto out_queue_exit;
> +       if (unlikely(!rq))
> +               blk_queue_exit(q);
> +       return rq;
> +}
> +
> +static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
> +                                                  blk_opf_t opf,
> +                                                  blk_mq_req_flags_t flags)
> +{
> +       struct blk_plug *plug = current->plug;
> +       struct request *rq;
> +
> +       if (!plug)
> +               return NULL;
> +       if (rq_list_empty(plug->cached_rq)) {
> +               if (plug->nr_ios == 1)
> +                       return NULL;
> +               rq = blk_mq_rq_cache_fill(q, plug, opf, flags);
> +               if (rq)
> +                       goto got_it;
> +               return NULL;
> +       }
> +       rq = rq_list_peek(&plug->cached_rq);
> +       if (!rq || rq->q != q)
> +               return NULL;
> +
> +       if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type)
> +               return NULL;
> +       if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
> +               return NULL;
> +
> +       plug->cached_rq = rq_list_next(rq);
> +got_it:
> +       rq->cmd_flags = opf;
> +       INIT_LIST_HEAD(&rq->queuelist);
> +       return rq;
> +}
> +
> +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
> +               blk_mq_req_flags_t flags)
> +{
> +       struct request *rq;
> +
> +       rq = blk_mq_alloc_cached_request(q, opf, flags);
> +       if (!rq) {
> +               struct blk_mq_alloc_data data = {
> +                       .q              = q,
> +                       .flags          = flags,
> +                       .cmd_flags      = opf,
> +                       .nr_tags        = 1,
> +               };
> +               int ret;
> +
> +               ret = blk_queue_enter(q, flags);
> +               if (ret)
> +                       return ERR_PTR(ret);
> +
> +               rq = __blk_mq_alloc_requests(&data);
> +               if (!rq)
> +                       goto out_queue_exit;
> +       }
>         rq->__data_len = 0;
>         rq->__sector = (sector_t) -1;
>         rq->bio = rq->biotail = NULL;
> --
> 2.35.1
>

A large chunk of this improvement in passthrough performance is coming by
enabling request caching. On my setup, the performance improves from
2.34 to 2.54 MIOPS. I have tested this using the t/io_uring utility (in fio) and
I am using an Intel Optane Gen2 device.

Tested-by: Anuj Gupta <anuj20.g at samsung.com>

--
Anuj Gupta



More information about the Linux-nvme mailing list