[PATCH 1/2] nvmet: introduce transport layer keep-alive
Sagi Grimberg
sagi at grimberg.me
Wed Oct 28 03:15:06 EDT 2020
On 10/27/20 5:15 AM, zhenwei pi wrote:
> In the zero KATO scenario, if initiator crashes without transport
> layer disconnection, target side would never reclaim resources.
>
> A target could start transport layer keep-alive to detect dead
> connection for the admin queue.
Not sure why we should worry about kato=0, it's really
more for debug purposes. I'd rather that we block this
option from the host altogether.
>
> Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
> ---
> drivers/nvme/target/admin-cmd.c | 2 +-
> drivers/nvme/target/core.c | 14 +++++++++++---
> drivers/nvme/target/nvmet.h | 3 ++-
> 3 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index dca34489a1dc..53fbd5c193a1 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -729,7 +729,7 @@ u16 nvmet_set_feat_kato(struct nvmet_req *req)
>
> nvmet_stop_keep_alive_timer(req->sq->ctrl);
> req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
> - nvmet_start_keep_alive_timer(req->sq->ctrl);
> + nvmet_start_keep_alive_timer(req->sq->ctrl, req);
>
> nvmet_set_result(req, req->sq->ctrl->kato);
>
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index 957b39a82431..451192f7ad6a 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -395,10 +395,18 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
> nvmet_ctrl_fatal_error(ctrl);
> }
>
> -void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
> +void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
> {
> - if (unlikely(ctrl->kato == 0))
> + if (unlikely(ctrl->kato == 0)) {
> + if (req->ops->keep_alive)
> + pr_info("ctrl %d starts with transport keep-alive %s\n", ctrl->cntlid,
> + req->ops->keep_alive(req) ? "failed" : "succeed");
> + else
> + pr_info("ctrl %d starts without both NVMeOF and transport keep-alive",
> + ctrl->cntlid);
> +
> return;
> + }
>
> pr_debug("ctrl %d start keep-alive timer for %d secs\n",
> ctrl->cntlid, ctrl->kato);
> @@ -1383,7 +1391,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
> ctrl->err_counter = 0;
> spin_lock_init(&ctrl->error_lock);
>
> - nvmet_start_keep_alive_timer(ctrl);
> + nvmet_start_keep_alive_timer(ctrl, req);
>
> mutex_lock(&subsys->lock);
> list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index 559a15ccc322..de1785ce9fcd 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -305,6 +305,7 @@ struct nvmet_fabrics_ops {
> u16 (*install_queue)(struct nvmet_sq *nvme_sq);
> void (*discovery_chg)(struct nvmet_port *port);
> u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
> + int (*keep_alive)(struct nvmet_req *req);
> };
>
> #define NVMET_MAX_INLINE_BIOVEC 8
> @@ -395,7 +396,7 @@ void nvmet_get_feat_async_event(struct nvmet_req *req);
> u16 nvmet_set_feat_kato(struct nvmet_req *req);
> u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
> void nvmet_execute_async_event(struct nvmet_req *req);
> -void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl);
> +void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl, struct nvmet_req *req);
> void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl);
>
> u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
>
More information about the Linux-nvme
mailing list