[External] Re: [PATCH 1/2] nvmet: introduce transport layer keep-alive
zhenwei pi
pizhenwei at bytedance.com
Wed Oct 28 05:29:00 EDT 2020
On 10/28/20 3:15 PM, Sagi Grimberg wrote:
>
> On 10/27/20 5:15 AM, zhenwei pi wrote:
>> In the zero KATO scenario, if initiator crashes without transport
>> layer disconnection, target side would never reclaim resources.
>>
>> A target could start transport layer keep-alive to detect dead
>> connection for the admin queue.
>
> Not sure why we should worry about kato=0, it's really
> more for debug purposes. I'd rather that we block this
> option from the host altogether.
>
A target can't suppose that initiator always runs as expected.
I'm developing a user level NVMeOF library, in my plan, user level KATO
timer gets a little complex, I'd like to use TCP level keep-alive
to make code simple. So the target side needs to handle zero KATO case.
>>
>> Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
>> ---
>> drivers/nvme/target/admin-cmd.c | 2 +-
>> drivers/nvme/target/core.c | 14 +++++++++++---
>> drivers/nvme/target/nvmet.h | 3 ++-
>> 3 files changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/nvme/target/admin-cmd.c
>> b/drivers/nvme/target/admin-cmd.c
>> index dca34489a1dc..53fbd5c193a1 100644
>> --- a/drivers/nvme/target/admin-cmd.c
>> +++ b/drivers/nvme/target/admin-cmd.c
>> @@ -729,7 +729,7 @@ u16 nvmet_set_feat_kato(struct nvmet_req *req)
>> nvmet_stop_keep_alive_timer(req->sq->ctrl);
>> req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
>> - nvmet_start_keep_alive_timer(req->sq->ctrl);
>> + nvmet_start_keep_alive_timer(req->sq->ctrl, req);
>> nvmet_set_result(req, req->sq->ctrl->kato);
>> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
>> index 957b39a82431..451192f7ad6a 100644
>> --- a/drivers/nvme/target/core.c
>> +++ b/drivers/nvme/target/core.c
>> @@ -395,10 +395,18 @@ static void nvmet_keep_alive_timer(struct
>> work_struct *work)
>> nvmet_ctrl_fatal_error(ctrl);
>> }
>> -void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
>> +void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl, struct
>> nvmet_req *req)
>> {
>> - if (unlikely(ctrl->kato == 0))
>> + if (unlikely(ctrl->kato == 0)) {
>> + if (req->ops->keep_alive)
>> + pr_info("ctrl %d starts with transport keep-alive %s\n",
>> ctrl->cntlid,
>> + req->ops->keep_alive(req) ? "failed" : "succeed");
>> + else
>> + pr_info("ctrl %d starts without both NVMeOF and transport
>> keep-alive",
>> + ctrl->cntlid);
>> +
>> return;
>> + }
>> pr_debug("ctrl %d start keep-alive timer for %d secs\n",
>> ctrl->cntlid, ctrl->kato);
>> @@ -1383,7 +1391,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn,
>> const char *hostnqn,
>> ctrl->err_counter = 0;
>> spin_lock_init(&ctrl->error_lock);
>> - nvmet_start_keep_alive_timer(ctrl);
>> + nvmet_start_keep_alive_timer(ctrl, req);
>> mutex_lock(&subsys->lock);
>> list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>> index 559a15ccc322..de1785ce9fcd 100644
>> --- a/drivers/nvme/target/nvmet.h
>> +++ b/drivers/nvme/target/nvmet.h
>> @@ -305,6 +305,7 @@ struct nvmet_fabrics_ops {
>> u16 (*install_queue)(struct nvmet_sq *nvme_sq);
>> void (*discovery_chg)(struct nvmet_port *port);
>> u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
>> + int (*keep_alive)(struct nvmet_req *req);
>> };
>> #define NVMET_MAX_INLINE_BIOVEC 8
>> @@ -395,7 +396,7 @@ void nvmet_get_feat_async_event(struct nvmet_req
>> *req);
>> u16 nvmet_set_feat_kato(struct nvmet_req *req);
>> u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
>> void nvmet_execute_async_event(struct nvmet_req *req);
>> -void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl);
>> +void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl, struct
>> nvmet_req *req);
>> void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl);
>> u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
>>
--
zhenwei pi
More information about the Linux-nvme
mailing list