[PATCH 5/7] nvmet: add support of fused operations
Dmitry Bogdanov
d.bogdanov at yadro.com
Wed Sep 11 23:42:57 PDT 2024
Add support of Compare And Write fused operation.
There can be several different Fused pairs inflight.
Atomicity is garanteed for just one logical block.
Signed-off-by: Dmitry Bogdanov <d.bogdanov at yadro.com>
---
drivers/nvme/target/admin-cmd.c | 2 +
drivers/nvme/target/core.c | 326 +++++++++++++++++++++++++++++++-
drivers/nvme/target/fc.c | 2 +-
drivers/nvme/target/nvmet.h | 12 +-
include/linux/nvme.h | 2 +
5 files changed, 338 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index e555b9efebfb..b1ca1e371212 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -437,6 +437,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
NVME_CTRL_ONCS_WRITE_ZEROES |
NVME_CTRL_ONCS_COMPARE);
+ id->fuses = cpu_to_le16(NVME_CTRL_FUSES_COMPARE_AND_WRITE);
+
/* XXX: don't report vwc if the underlying device is write through */
id->vwc = NVME_CTRL_VWC_PRESENT;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b7b2bf7b460c..b4fb66037e45 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -804,9 +804,98 @@ int nvmet_compare_sg(struct nvmet_req *req)
return ret;
}
+static void nvmet_fused_failed(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, fused_work);
+
+ nvmet_req_complete(req, NVME_SC_FUSED_FAIL);
+}
+
+static void nvmet_fused_succeeded(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, fused_work);
+
+ req->execute(req);
+}
+
+static void __nvmet_fused_req_complete(struct nvmet_req *req,
+ struct nvmet_req *fused_req,
+ u16 status)
+{
+ struct nvmet_req *first_req, *second_req;
+ unsigned long flags;
+
+ if (req->cmd->common.flags & NVME_CMD_FUSE_FIRST) {
+ first_req = req;
+ second_req = fused_req;
+ } else {
+ first_req = fused_req;
+ second_req = req;
+ }
+
+ spin_lock_irqsave(&first_req->fused_lock, flags);
+ spin_lock(&second_req->fused_lock);
+
+ req->fused_state = NVMET_FUSED_STATE_COMPLETED;
+
+ switch (fused_req->fused_state) {
+ case NVMET_FUSED_STATE_INIT:
+ fallthrough;
+ case NVMET_FUSED_STATE_FAILED:
+ /* if second command is not yet executing then the first
+ * was failed, so fail the second one
+ */
+ WARN_ON(!status);
+ fused_req->fused_state = NVMET_FUSED_STATE_FAILED;
+ /* wait for fused request complete/execute */
+ spin_unlock(&second_req->fused_lock);
+ spin_unlock_irqrestore(&first_req->fused_lock, flags);
+ return;
+ case NVMET_FUSED_STATE_EXECUTING:
+ /* The second fused command is waiting for completion of the first
+ * command. Continue handling of the second command depending on
+ * the completion status.
+ */
+ if (status)
+ INIT_WORK(&fused_req->fused_work, nvmet_fused_failed);
+ else
+ INIT_WORK(&fused_req->fused_work, nvmet_fused_succeeded);
+
+ queue_work(nvmet_wq, &fused_req->fused_work);
+
+ spin_unlock(&second_req->fused_lock);
+ spin_unlock_irqrestore(&first_req->fused_lock, flags);
+
+ return;
+ case NVMET_FUSED_STATE_COMPLETED:
+ /* both fused command completed - send responses */
+ spin_unlock(&second_req->fused_lock);
+ spin_unlock_irqrestore(&first_req->fused_lock, flags);
+
+ if (first_req->ns)
+ nvmet_put_namespace(first_req->ns);
+ first_req->ops->queue_response(first_req);
+
+ if (second_req->ns)
+ nvmet_put_namespace(second_req->ns);
+ second_req->ops->queue_response(second_req);
+
+ return;
+ }
+ /* must not be here */
+ WARN_ON(1);
+ spin_unlock(&second_req->fused_lock);
+ spin_unlock_irqrestore(&first_req->fused_lock, flags);
+
+ if (req->ns)
+ nvmet_put_namespace(req->ns);
+ req->ops->queue_response(req);
+}
+
static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
{
struct nvmet_ns *ns = req->ns;
+ unsigned long flags;
if (!req->sq->sqhd_disabled)
nvmet_update_sq_head(req);
@@ -818,6 +907,62 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
trace_nvmet_req_complete(req);
+ if (unlikely(req->cmd->common.flags & NVME_CMD_FUSE_FIRST)) {
+ struct nvmet_req *fused_req = NULL;
+
+ spin_lock_irqsave(&req->sq->fused_lock, flags);
+ if (req->sq->first_fused_req == req) {
+ req->sq->first_fused_req = NULL;
+ spin_unlock_irqrestore(&req->sq->fused_lock, flags);
+ /* There is no the second fused command yet, so just
+ * complete this one. The second command will fail with
+ * MISSING code because sq->first_fused_req is NULL..
+ */
+ goto queue_resp;
+ }
+ /* This command is not the first fused waiting for the second
+ * fused, either the second is received or there was no second
+ * and this one is failed.
+ * req->fused_pair is safe to read without spin_lock, because
+ * sq->first_fused_req != req and req->fused_pair will not set.
+ */
+ fused_req = req->fused_pair;
+ if (!fused_req) {
+ spin_unlock_irqrestore(&req->sq->fused_lock, flags);
+ /* There is no the second fused command yet, so just
+ * complete this one.
+ */
+ goto queue_resp;
+ }
+ spin_unlock_irqrestore(&req->sq->fused_lock, flags);
+
+ /* There is the second fused command */
+
+ __nvmet_fused_req_complete(req, fused_req, status);
+
+ return;
+ } else if (unlikely(req->cmd->common.flags & NVME_CMD_FUSE_SECOND)) {
+ struct nvmet_req *fused_req = NULL;
+
+ spin_lock_irqsave(&req->fused_lock, flags);
+ if (!req->fused_pair) {
+ /* There is no the first fused command, so just
+ * complete this one
+ */
+ spin_unlock_irqrestore(&req->fused_lock, flags);
+ goto queue_resp;
+ }
+ fused_req = req->fused_pair;
+ req->fused_state = NVMET_FUSED_STATE_COMPLETED;
+ spin_unlock_irqrestore(&req->fused_lock, flags);
+
+ __nvmet_fused_req_complete(req, fused_req, status);
+
+ /* unlock LBA */
+ return;
+ }
+
+queue_resp:
req->ops->queue_response(req);
if (ns)
nvmet_put_namespace(ns);
@@ -866,6 +1011,11 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
*/
if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
nvmet_async_events_failall(ctrl);
+
+ /* Complete the first fused command waiting for the second one */
+ if (sq->first_fused_req)
+ nvmet_req_complete(sq->first_fused_req, NVME_SC_FUSED_MISSING);
+
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
@@ -915,6 +1065,7 @@ int nvmet_sq_init(struct nvmet_sq *sq)
init_completion(&sq->free_done);
init_completion(&sq->confirm_done);
nvmet_auth_sq_init(sq);
+ spin_lock_init(&sq->fused_lock);
return 0;
}
@@ -950,6 +1101,107 @@ static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
return 0;
}
+/* Fail the first fused command that does not have a fused pair */
+static void nvmet_fail_first_fused(struct nvmet_req *req)
+{
+ spin_lock(&req->fused_lock);
+ if (req->fused_state == NVMET_FUSED_STATE_INIT) {
+ /* command still collects a data, just set state */
+ req->fused_state = NVMET_FUSED_STATE_FAILED;
+ spin_unlock(&req->fused_lock);
+ return;
+ } else if (req->fused_state == NVMET_FUSED_STATE_EXECUTING) {
+ req->fused_state = NVMET_FUSED_STATE_FAILED;
+ spin_unlock(&req->fused_lock);
+ nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+ return;
+ }
+ spin_unlock(&req->fused_lock);
+}
+
+/* This function is called always in the same thread,
+ * but req->sq->first_fused_req and req->fused_* are used in different threads.
+ * Therefore locks are used to synchronize an access to them.
+ */
+static u16 nvmet_cmd_check_fused(struct nvmet_req *req)
+{
+ struct nvmet_req *first_req;
+
+ if (likely(!(req->cmd->common.flags & NVME_CMD_FUSE_MASK))) {
+ if (unlikely(req->sq->first_fused_req)) {
+ spin_lock(&req->sq->fused_lock);
+ first_req = req->sq->first_fused_req;
+ if (req->sq->first_fused_req) {
+ req->sq->first_fused_req = NULL;
+ spin_unlock(&req->sq->fused_lock);
+ nvmet_fail_first_fused(first_req);
+ } else {
+ spin_unlock(&req->sq->fused_lock);
+ }
+ }
+ return 0;
+ } else if ((req->cmd->common.flags & NVME_CMD_FUSE_MASK) == NVME_CMD_FUSE_MASK) {
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ }
+
+ spin_lock(&req->sq->fused_lock);
+ first_req = req->sq->first_fused_req;
+ if (req->cmd->common.flags & NVME_CMD_FUSE_FIRST) {
+ if (first_req) {
+ req->sq->first_fused_req = NULL;
+ spin_unlock(&req->sq->fused_lock);
+
+ nvmet_fail_first_fused(first_req);
+
+ spin_lock(&req->sq->fused_lock);
+ }
+
+ if (req->cmd->common.opcode != nvme_cmd_compare) {
+ spin_unlock(&req->sq->fused_lock);
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ }
+
+ /* only one logical block is supported for Compare And Write */
+ if (req->cmd->rw.length > 0) {
+ spin_unlock(&req->sq->fused_lock);
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ }
+
+ req->sq->first_fused_req = req;
+ } else if (unlikely(req->cmd->common.flags & NVME_CMD_FUSE_SECOND)) {
+ if (!first_req) {
+ spin_unlock(&req->sq->fused_lock);
+ return NVME_SC_FUSED_MISSING;
+ }
+
+ if (req->cmd->common.opcode != nvme_cmd_write) {
+ req->sq->first_fused_req = NULL;
+ spin_unlock(&req->sq->fused_lock);
+
+ nvmet_fail_first_fused(first_req);
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ }
+
+ if (req->cmd->rw.length > 0 ||
+ req->cmd->rw.slba != first_req->cmd->rw.slba) {
+ req->sq->first_fused_req = NULL;
+ spin_unlock(&req->sq->fused_lock);
+
+ nvmet_fail_first_fused(first_req);
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ }
+
+ req->fused_pair = first_req;
+ first_req->fused_pair = req;
+ req->sq->first_fused_req = NULL;
+ }
+
+ req->fused_state = NVMET_FUSED_STATE_INIT;
+ spin_unlock(&req->sq->fused_lock);
+
+ return 0;
+}
+
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
@@ -1019,11 +1272,14 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->ns = NULL;
req->error_loc = NVMET_NO_ERROR_LOC;
req->error_slba = 0;
+ req->fused_pair = NULL;
+
+ spin_lock_init(&req->fused_lock);
- /* no support for fused commands yet */
- if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
+
+ status = nvmet_cmd_check_fused(req);
+ if (unlikely(status)) {
req->error_loc = offsetof(struct nvme_common_command, flags);
- status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto fail;
}
@@ -1100,7 +1356,69 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
void nvmet_req_execute(struct nvmet_req *req)
{
- req->execute(req);
+ if (likely(!(req->cmd->common.flags & NVME_CMD_FUSE_MASK))) {
+ req->execute(req);
+ return;
+ }
+
+ if (req->cmd->common.flags & NVME_CMD_FUSE_FIRST) {
+ spin_lock(&req->fused_lock);
+ if (req->fused_state == NVMET_FUSED_STATE_FAILED) {
+ spin_unlock(&req->fused_lock);
+ nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+ return;
+ }
+
+ req->fused_state = NVMET_FUSED_STATE_EXECUTING;
+
+ if (req->fused_pair) {
+ spin_lock(&req->fused_pair->fused_lock);
+ if (req->fused_pair->fused_state == NVMET_FUSED_STATE_EXECUTING) {
+ spin_unlock(&req->fused_pair->fused_lock);
+ spin_unlock(&req->fused_lock);
+ req->execute(req);
+ return;
+ }
+ spin_unlock(&req->fused_pair->fused_lock);
+ }
+ spin_unlock(&req->fused_lock);
+ /* wait for both fused command ready to execute */
+ return;
+ } else if (req->cmd->common.flags & NVME_CMD_FUSE_SECOND) {
+ struct nvmet_req *first_req = NULL;
+
+ spin_lock(&req->fused_lock);
+ if (!req->fused_pair) {
+ spin_unlock(&req->fused_lock);
+ nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+ return;
+ }
+
+ if (req->fused_state == NVMET_FUSED_STATE_FAILED) {
+ spin_unlock(&req->fused_lock);
+ nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+ return;
+ }
+ first_req = req->fused_pair;
+ spin_unlock(&req->fused_lock);
+
+ /* take locks in the same order */
+ spin_lock(&first_req->fused_lock);
+ spin_lock(&req->fused_lock);
+ req->fused_state = NVMET_FUSED_STATE_EXECUTING;
+
+ if (first_req->fused_state == NVMET_FUSED_STATE_EXECUTING) {
+ spin_unlock(&req->fused_lock);
+ spin_unlock(&first_req->fused_lock);
+ /* both fused command are ready to execute */
+ first_req->execute(first_req);
+ return;
+ }
+
+ spin_unlock(&req->fused_lock);
+ spin_unlock(&first_req->fused_lock);
+ /* wait for both fused command ready to execute */
+ }
}
EXPORT_SYMBOL_GPL(nvmet_req_execute);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index a77f0e05c174..804c2d0be6bc 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2186,7 +2186,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
nvme_is_fabrics((struct nvme_command *) sqe) ||
xfr_length != fod->req.transfer_len ||
(le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] ||
- (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) ||
+ (sqe->flags & NVME_CMD_FUSE_MASK) ||
queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head)))
send_ersp = true;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 41e4eec9a251..1278b81c85a7 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -102,6 +102,7 @@ struct nvmet_cq {
u16 size;
};
+struct nvmet_req;
struct nvmet_sq {
struct nvmet_ctrl *ctrl;
struct percpu_ref ref;
@@ -124,6 +125,8 @@ struct nvmet_sq {
#endif
struct completion free_done;
struct completion confirm_done;
+ struct nvmet_req *first_fused_req;
+ spinlock_t fused_lock;
};
struct nvmet_ana_group {
@@ -340,7 +343,6 @@ struct nvmet_subsys_link {
struct nvmet_subsys *subsys;
};
-struct nvmet_req;
struct nvmet_fabrics_ops {
struct module *owner;
unsigned int type;
@@ -374,6 +376,14 @@ struct nvmet_req {
struct scatterlist *sg;
struct scatterlist *metadata_sg;
struct scatterlist *cmp_sg;
+ struct nvmet_req *fused_pair;
+ struct work_struct fused_work;
+ spinlock_t fused_lock;
+ int fused_state;
+#define NVMET_FUSED_STATE_INIT 0
+#define NVMET_FUSED_STATE_EXECUTING 1
+#define NVMET_FUSED_STATE_COMPLETED 2
+#define NVMET_FUSED_STATE_FAILED 3
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
union {
struct {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b58d9405d65e..af6fbff74dcc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -374,6 +374,7 @@ enum {
NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_ONCS_RESERVATIONS = 1 << 5,
NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
+ NVME_CTRL_FUSES_COMPARE_AND_WRITE = 1 << 0,
NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3,
@@ -949,6 +950,7 @@ union nvme_data_ptr {
enum {
NVME_CMD_FUSE_FIRST = (1 << 0),
NVME_CMD_FUSE_SECOND = (1 << 1),
+ NVME_CMD_FUSE_MASK = NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND,
NVME_CMD_SGL_METABUF = (1 << 6),
NVME_CMD_SGL_METASEG = (1 << 7),
--
2.25.1
More information about the Linux-nvme
mailing list