[PATCH v20 02/12] Add infrastructure for copy offload in block and request layer.
Nitesh Shetty
nj.shetty at samsung.com
Thu May 30 00:16:30 PDT 2024
On 29/05/24 03:41PM, Bart Van Assche wrote:
>On 5/29/24 12:48 AM, Damien Le Moal wrote:
>>On 5/29/24 15:17, Nitesh Shetty wrote:
>>>On 24/05/24 01:33PM, Bart Van Assche wrote:
>>>>On 5/20/24 03:20, Nitesh Shetty wrote:
>>>>>We add two new opcode REQ_OP_COPY_DST, REQ_OP_COPY_SRC.
>>>>>Since copy is a composite operation involving src and dst sectors/lba,
>>>>>each needs to be represented by a separate bio to make it compatible
>>>>>with device mapper.
>>>>>We expect caller to take a plug and send bio with destination information,
>>>>>followed by bio with source information.
>>>>>Once the dst bio arrives we form a request and wait for source
>>>>>bio. Upon arrival of source bio we merge these two bio's and send
>>>>>corresponding request down to device driver.
>>>>>Merging non copy offload bio is avoided by checking for copy specific
>>>>>opcodes in merge function.
>>>>
>>>>In this patch I don't see any changes for blk_attempt_bio_merge(). Does
>>>>this mean that combining REQ_OP_COPY_DST and REQ_OP_COPY_SRC will never
>>>>happen if the QUEUE_FLAG_NOMERGES request queue flag has been set?
>>>>
>>>Yes, in this case copy won't work, as both src and dst bio reach driver
>>>as part of separate requests.
>>>We will add this as part of documentation.
>>
>>So that means that 2 major SAS HBAs which set this flag (megaraid and mpt3sas)
>>will not get support for copy offload ? Not ideal, by far.
>
>QUEUE_FLAG_NOMERGES can also be set through sysfs (see also
>queue_nomerges_store()). This is one of the reasons why using the merge
>infrastructure for combining REQ_OP_COPY_DST and REQ_OP_COPY_SRC is
>unacceptable.
>
Bart, Damien, Hannes,
Thanks for your review.
We tried a slightly modified approach which simplifies this patch and
also avoids merge path.
Also with this, we should be able to solve the QUEUE_FLAG_MERGES issue.
Previously we also tried payload/token based approach,
which avoids merge path and tries to combine bios in driver.
But we received feedback that it wasn't the right approach [1].
Do below changes look any better or do you guys have anything else in mind ?
[1] https://lore.kernel.org/linux-block/ZIKphgDavKVPREnw@infradead.org/
diff --git a/block/blk-core.c b/block/blk-core.c
index 82c3ae22d76d..7158bac8cc57 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -122,6 +122,8 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(ZONE_APPEND),
REQ_OP_NAME(WRITE_ZEROES),
+ REQ_OP_NAME(COPY_SRC),
+ REQ_OP_NAME(COPY_DST),
REQ_OP_NAME(DRV_IN),
REQ_OP_NAME(DRV_OUT),
};
@@ -839,6 +841,11 @@ void submit_bio_noacct(struct bio *bio)
* requests.
*/
fallthrough;
+ case REQ_OP_COPY_SRC:
+ case REQ_OP_COPY_DST:
+ if (!q->limits.max_copy_sectors)
+ goto not_supported;
+ break;
default:
goto not_supported;
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8534c35e0497..a651e7c114d0 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -154,6 +154,20 @@ static struct bio *bio_split_write_zeroes(struct bio *bio,
return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
}
+static struct bio *bio_split_copy(struct bio *bio,
+ const struct queue_limits *lim,
+ unsigned int *nsegs)
+{
+ *nsegs = 1;
+ if (bio_sectors(bio) <= lim->max_copy_sectors)
+ return NULL;
+ /*
+ * We don't support splitting for a copy bio. End it with EIO if
+ * splitting is required and return an error pointer.
+ */
+ return ERR_PTR(-EIO);
+}
+
/*
* Return the maximum number of sectors from the start of a bio that may be
* submitted as a single request to a block device. If enough sectors remain,
@@ -362,6 +376,12 @@ struct bio *__bio_split_to_limits(struct bio *bio,
case REQ_OP_WRITE_ZEROES:
split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
break;
+ case REQ_OP_COPY_SRC:
+ case REQ_OP_COPY_DST:
+ split = bio_split_copy(bio, lim, nr_segs);
+ if (IS_ERR(split))
+ return NULL;
+ break;
default:
split = bio_split_rw(bio, lim, nr_segs, bs,
get_max_io_size(bio, lim) << SECTOR_SHIFT);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3b4df8e5ac9e..6d4ffbdade28 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2833,6 +2833,63 @@ static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
blk_mq_commit_rqs(hctx, queued, false);
}
+/*
+ * Copy offload sends a pair of bio with REQ_OP_COPY_DST and REQ_OP_COPY_SRC
+ * operation by taking a plug.
+ * Initially DST bio is sent which forms a request and
+ * waits for SRC bio to arrive. Once SRC bio arrives
+ * we combine it and send request down to driver.
+ */
+static inline bool blk_copy_offload_combine_ok(struct request *req,
+ struct bio *bio)
+{
+ return (req_op(req) == REQ_OP_COPY_DST &&
+ bio_op(bio) == REQ_OP_COPY_SRC);
+}
+
+static int blk_copy_offload_combine(struct request *req, struct bio *bio)
+{
+ if (!blk_copy_offload_combine_ok(req, bio))
+ return 1;
+
+ if (req->__data_len != bio->bi_iter.bi_size)
+ return 1;
+
+ req->biotail->bi_next = bio;
+ req->biotail = bio;
+ req->nr_phys_segments++;
+ req->__data_len += bio->bi_iter.bi_size;
+
+ return 0;
+}
+
+static inline bool blk_copy_offload_attempt_combine(struct request_queue *q,
+ struct bio *bio)
+{
+ struct blk_plug *plug = current->plug;
+ struct request *rq;
+
+ if (!plug || rq_list_empty(plug->mq_list))
+ return false;
+
+ rq_list_for_each(&plug->mq_list, rq) {
+ if (rq->q == q) {
+ if (!blk_copy_offload_combine(rq, bio))
+ return true;
+ break;
+ }
+
+ /*
+ * Only keep iterating plug list for combines if we have multiple
+ * queues
+ */
+ if (!plug->multiple_queues)
+ break;
+ }
+ return false;
+}
+
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
struct bio *bio, unsigned int nr_segs)
{
@@ -2977,6 +3034,9 @@ void blk_mq_submit_bio(struct bio *bio)
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
goto queue_exit;
+ if (blk_copy_offload_attempt_combine(q, bio))
+ goto queue_exit;
+
if (blk_queue_is_zoned(q) && blk_zone_plug_bio(bio, nr_segs))
goto queue_exit;
diff --git a/block/blk.h b/block/blk.h
index 189bc25beb50..112c6736f44c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -323,6 +323,8 @@ static inline bool bio_may_exceed_limits(struct bio *bio,
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_COPY_SRC:
+ case REQ_OP_COPY_DST:
return true; /* non-trivial splitting decisions */
default:
break;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 781c4500491b..22a08425d13e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -342,6 +342,10 @@ enum req_op {
/* reset all the zone present on the device */
REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15,
+ /* copy offload source and destination operations */
+ REQ_OP_COPY_SRC = (__force blk_opf_t)18,
+ REQ_OP_COPY_DST = (__force blk_opf_t)19,
+
/* Driver private requests */
REQ_OP_DRV_IN = (__force blk_opf_t)34,
REQ_OP_DRV_OUT = (__force blk_opf_t)35,
--·
2.34.1
More information about the Linux-nvme
mailing list