[PATCH] Add support for reservations to NVMeoF target
Sagi Grimberg
sagi at grimberg.me
Sun Aug 27 23:00:20 PDT 2017
Hi Omri,
First, please follow the format of patch submission as described
in: Documentation/process/submitting-patches.rst
Second, I would love to see a generic reservations lib so nvmet
and LIO can share the same abstraction (with proper callouts
in place). But I don't think its a gating issue.
Adding Mike to the thread.
Third, persistent reservations should survive power loss, which
means they need to be logged in a file at least.
As for the implementation itself, any reason to choose a rw_lock
instead of a rcu protection scheme? Up until now we got the
IO path to be lock free and I'm not very enthusiast starting now.
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index a53bb66..9695af7 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -156,6 +156,30 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
> * still claim to fully implement this mandatory log page.
> */
> break;
> + case NVME_LOG_RESERVATION:
> + if (data_len < 16) {
> + status = NVME_SC_INTERNAL;
> + goto err;
> + }
> + mutex_lock(&req->sq->ctrl->lock);
> + req->sq->ctrl->rsrv_log_async_sent = false;
> + if (!list_empty(&req->sq->ctrl->rsrv_log)) {
> + struct nvmet_rsrv_log *p;
> +
> + list_for_each_entry(p, &req->sq->ctrl->rsrv_log, link) {
> + if (++((u8 *)buf)[9] == 255)
> + break;
> + }
> + p = list_first_entry(&req->sq->ctrl->rsrv_log,
> + struct nvmet_rsrv_log, link);
> + ++req->sq->ctrl->rsrv_log_counter;
> + *(__le64 *)buf =
> + cpu_to_le64(&req->sq->ctrl->rsrv_log_counter);
> + ((u8 *)buf)[8] = p->log_type;
> + *(__le32 *)(buf+12) = cpu_to_le32(p->nsid);
I suggest that buf should be casted to a generic reservation log page
event (defined in include/linux/nvme.h) instead of this buffer
manipulation. And please move the handling to a dedicated routine.
> + }
> + mutex_unlock(&req->sq->ctrl->lock);
> + break;
> default:
> BUG();
> }
> @@ -461,6 +485,15 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
> req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
> nvmet_set_result(req, req->sq->ctrl->kato);
> break;
> + case NVME_FEAT_HOST_ID:
> + status = nvmet_copy_from_sgl(req, 0, &req->sq->ctrl->host_id,
> + 8);
> + break;
Can you split the host_id part to a dedicated patch?
> +void add_reservation_log_page(struct nvmet_ctrl *ctrl,
> + struct nvmet_ns *ns, int type)
> +{
> + struct nvmet_rsrv_log *p;
> +
> + if (ctrl->rsrv_mask & (1 << type))
> + return;
> + p = kzalloc(sizeof(*p), GFP_KERNEL);
> + if (p == NULL)
> + return;
> + p->nsid = ns->nsid;
> + p->log_type = type;
> + mutex_lock(&ctrl->lock);
> + list_add_tail(&p->link, &ctrl->rsrv_log);
> + mutex_unlock(&ctrl->lock);
> + if (!ctrl->rsrv_log_async_sent) {
> + ctrl->rsrv_log_async_sent = true;
> + nvmet_add_async_event(ctrl, 6, 0, NVME_LOG_RESERVATION);
> + }
> +}
It would be good if you directly fill a reservation log page structure
itself here.
> +
> int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
> {
> int ret = 0;
> @@ -377,6 +398,8 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
>
> INIT_LIST_HEAD(&ns->dev_link);
> init_completion(&ns->disable_done);
> + INIT_LIST_HEAD(&ns->rsrv_list);
> + rwlock_init(&ns->rsrv_lock);
>
> ns->nsid = nsid;
> ns->subsys = subsys;
> @@ -846,6 +869,7 @@ static void nvmet_ctrl_free(struct kref *ref)
> nvmet_stop_keep_alive_timer(ctrl);
>
> mutex_lock(&subsys->lock);
> + nvmet_rsrv_remove_ctrl(ctrl);
> list_del(&ctrl->subsys_entry);
> mutex_unlock(&subsys->lock);
>
> diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
> index 3b4d47a..4eb4182 100644
> --- a/drivers/nvme/target/io-cmd.c
> +++ b/drivers/nvme/target/io-cmd.c
> @@ -16,6 +16,8 @@
> #include <linux/module.h>
> #include "nvmet.h"
>
> +#define IEKEY 0x8 /* Ignore Existing Key */
> +
> static void nvmet_bio_done(struct bio *bio)
> {
> struct nvmet_req *req = bio->bi_private;
> @@ -40,6 +42,57 @@ static void nvmet_inline_bio_init(struct nvmet_req *req)
> bio_init(bio, req->inline_bvec, NVMET_MAX_INLINE_BIOVEC);
> }
>
> +static inline bool nvmet_ctrl_same_host(struct nvmet_ctrl *ctrl1,
> + struct nvmet_ctrl *ctrl2)
> +{
> + if (ctrl1 == NULL || ctrl2 == NULL)
> + return false;
> + if (ctrl1 == ctrl2)
> + return true;
> + if (ctrl1->host_id != 0 && ctrl1->host_id == ctrl2->host_id)
> + return true;
> + return false;
> +}
> +
> +static struct nvmet_rsrv *nvmet_find_reservation(struct nvmet_req *req)
> +{
> + struct nvmet_rsrv *p;
> +
> + list_for_each_entry(p, &req->ns->rsrv_list, link) {
> + if (nvmet_ctrl_same_host(p->ctrl, req->sq->ctrl))
> + return p;
> + }
> + return NULL;
> +}
> +
> +static bool nvmet_check_reservation(struct nvmet_req *req)
> +{
> + bool success = true;
> +
> + read_lock(&req->ns->rsrv_lock);
> + switch (req->ns->rsrv_type) {
> + case PR_WRITE_EXCLUSIVE:
> + if (req->cmd->rw.opcode == nvme_cmd_read)
> + goto out;
> + case PR_EXCLUSIVE_ACCESS:
> + success = nvmet_ctrl_same_host(req->ns->rsrv_ctrl,
> + req->sq->ctrl);
> + goto out;
> + case PR_WRITE_EXCLUSIVE_REG_ONLY:
> + case PR_WRITE_EXCLUSIVE_ALL_REGS:
> + if (req->cmd->rw.opcode == nvme_cmd_read)
> + goto out;
> + case PR_EXCLUSIVE_ACCESS_REG_ONLY:
> + case PR_EXCLUSIVE_ACCESS_ALL_REGS:
> + success = (nvmet_find_reservation(req) != NULL);
> + default:
> + goto out;
> + }
> +out:
> + read_unlock(&req->ns->rsrv_lock);
> + return success;
> +}
> +
> static void nvmet_execute_rw(struct nvmet_req *req)
> {
> int sg_cnt = req->sg_cnt;
> @@ -196,6 +249,296 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
> }
> }
>
> +static void nvmet_execute_resv_register(struct nvmet_req *req)
> +{
> + struct nvmet_rsrv *p;
> + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
> + u16 status = NVME_SC_SUCCESS;
> + struct { u64 crkey, nrkey; } keys;
> +
> + write_lock(&req->ns->rsrv_lock);
> + status = nvmet_copy_from_sgl(req, 0, &keys, sizeof(keys));
> + if (status)
> + goto out;
> + p = nvmet_find_reservation(req);
> + switch (cdw10 & 0x7) {
You should add a macro to extract the reservation action
directly from nvme_command.
> + case 0: /* Register */
I think that meaningful enumeration is better then comments.
> + if (p == NULL) {
> + p = kzalloc(sizeof(*p), GFP_KERNEL);
Allocation under write_lock?
> +
> +static void nvmet_execute_resv_report(struct nvmet_req *req)
> +{
> + union {
> + struct {
> + __le32 gen;
> + u8 rtype;
> + __le16 regctls;
> + u16 rsvd2;
> + u8 ptpls;
> + u8 rsvd14[14];
> + } __packed hdr;
> + struct {
> + __le16 cntlid;
> + u8 rcsts;
> + u8 rsvd5[5];
> + __le64 hostid;
> + __le64 rkey;
> + } __packed cntrl;
These are generic nvme structures, so they should live in
nvme protocol header.
More information about the Linux-nvme
mailing list