[PATCH 5/7] nvme/nvme-fabrics: introduce nvmf_error_recovery_work API
Max Gurtovoy
mgurtovoy at nvidia.com
Tue Oct 19 06:17:12 PDT 2021
On 10/19/2021 3:43 PM, Sagi Grimberg wrote:
>
>
> On 10/18/21 4:40 PM, Max Gurtovoy wrote:
>> Error recovery work is duplicated in RDMA and TCP transports. Move this
>> logic to common code. For that, introduce 2 new ctrl ops to teardown IO
>> and admin queue.
>>
>> Also update the RDMA/TCP transport drivers to use this API and remove
>> the duplicated code.
>>
>> Reviewed-by: Israel Rukshin <israelr at nvidia.com>
>> Signed-off-by: Max Gurtovoy <mgurtovoy at nvidia.com>
>> ---
>> drivers/nvme/host/fabrics.c | 23 +++++++++++++++
>> drivers/nvme/host/fabrics.h | 1 +
>> drivers/nvme/host/nvme.h | 4 +++
>> drivers/nvme/host/rdma.c | 56 ++++++++++++++++---------------------
>> drivers/nvme/host/tcp.c | 56 +++++++++++++++----------------------
>> 5 files changed, 75 insertions(+), 65 deletions(-)
>
> Diffstat dry stats are not in your favor...
>
>>
>> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
>> index 2edd086fa922..544195369c97 100644
>> --- a/drivers/nvme/host/fabrics.c
>> +++ b/drivers/nvme/host/fabrics.c
>> @@ -493,6 +493,29 @@ void nvmf_reconnect_or_remove(struct nvme_ctrl
>> *ctrl)
>> }
>> EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
>> +void nvmf_error_recovery_work(struct work_struct *work)
>> +{
>> + struct nvme_ctrl *ctrl = container_of(work,
>> + struct nvme_ctrl, err_work);
>> +
>> + nvme_stop_keep_alive(ctrl);
>> + ctrl->ops->teardown_ctrl_io_queues(ctrl);
>> + /* unquiesce to fail fast pending requests */
>> + nvme_start_queues(ctrl);
>> + ctrl->ops->teardown_ctrl_admin_queue(ctrl);
>> + blk_mq_unquiesce_queue(ctrl->admin_q);
>> +
>> + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
>> + /* state change failure is ok if we started ctrl delete */
>> + WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
>> + ctrl->state != NVME_CTRL_DELETING_NOIO);
>> + return;
>> + }
>> +
>> + nvmf_reconnect_or_remove(ctrl);
>
> We need James to provide feedback how can this be useful for FC.
>
>> +}
>> +EXPORT_SYMBOL_GPL(nvmf_error_recovery_work);
>> +
>> void nvmf_error_recovery(struct nvme_ctrl *ctrl)
>> {
>> if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
>> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
>> index 3d8ec7133fc8..8655eff74ed0 100644
>> --- a/drivers/nvme/host/fabrics.h
>> +++ b/drivers/nvme/host/fabrics.h
>> @@ -190,6 +190,7 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char
>> *buf, int size);
>> bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
>> void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl);
>> void nvmf_error_recovery(struct nvme_ctrl *ctrl);
>> +void nvmf_error_recovery_work(struct work_struct *work);
>> bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
>> struct nvmf_ctrl_options *opts);
>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>> index f9e1ce93d61d..1573edf6e97f 100644
>> --- a/drivers/nvme/host/nvme.h
>> +++ b/drivers/nvme/host/nvme.h
>> @@ -493,6 +493,10 @@ struct nvme_ctrl_ops {
>> void (*submit_async_event)(struct nvme_ctrl *ctrl);
>> void (*delete_ctrl)(struct nvme_ctrl *ctrl);
>> int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
>> +
>> + /* Fabrics only */
>> + void (*teardown_ctrl_io_queues)(struct nvme_ctrl *ctrl);
>> + void (*teardown_ctrl_admin_queue)(struct nvme_ctrl *ctrl);
>
> This becomes strange that we have teardown without a setup callback...
We can do it incrementally.
It's not the first time we do it :)
>
>> };
>> /*
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index 1c57e371af61..f4e4ebf673d2 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -1031,6 +1031,11 @@ static void
>> nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
>> nvme_rdma_destroy_admin_queue(ctrl, remove);
>> }
>> +static void _nvme_rdma_teardown_admin_queue(struct nvme_ctrl *ctrl)
>> +{
>> + nvme_rdma_teardown_admin_queue(to_rdma_ctrl(ctrl), false);
>> +}
>> +
>> static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
>> bool remove)
>> {
>> @@ -1046,6 +1051,11 @@ static void
>> nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
>> }
>> }
>> +static void _nvme_rdma_teardown_io_queues(struct nvme_ctrl *ctrl)
>> +{
>> + nvme_rdma_teardown_io_queues(to_rdma_ctrl(ctrl), false);
>> +}
>> +
>> static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
>> {
>> struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
>> @@ -1164,27 +1174,6 @@ static void
>> nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
>> nvmf_reconnect_or_remove(&ctrl->ctrl);
>> }
>> -static void nvme_rdma_error_recovery_work(struct work_struct *work)
>> -{
>> - struct nvme_rdma_ctrl *ctrl = container_of(work,
>> - struct nvme_rdma_ctrl, ctrl.err_work);
>> -
>> - nvme_stop_keep_alive(&ctrl->ctrl);
>> - nvme_rdma_teardown_io_queues(ctrl, false);
>> - nvme_start_queues(&ctrl->ctrl);
>> - nvme_rdma_teardown_admin_queue(ctrl, false);
>> - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
>> -
>> - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
>> - /* state change failure is ok if we started ctrl delete */
>> - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
>> - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
>> - return;
>> - }
>> -
>> - nvmf_reconnect_or_remove(&ctrl->ctrl);
>> -}
>> -
>> static void nvme_rdma_end_request(struct nvme_rdma_request *req)
>> {
>> struct request *rq = blk_mq_rq_from_pdu(req);
>> @@ -2240,16 +2229,19 @@ static void nvme_rdma_reset_ctrl_work(struct
>> work_struct *work)
>> }
>> static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
>> - .name = "rdma",
>> - .module = THIS_MODULE,
>> - .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
>> - .reg_read32 = nvmf_reg_read32,
>> - .reg_read64 = nvmf_reg_read64,
>> - .reg_write32 = nvmf_reg_write32,
>> - .free_ctrl = nvme_rdma_free_ctrl,
>> - .submit_async_event = nvme_rdma_submit_async_event,
>> - .delete_ctrl = nvme_rdma_delete_ctrl,
>> - .get_address = nvmf_get_address,
>> + .name = "rdma",
>> + .module = THIS_MODULE,
>> + .flags = NVME_F_FABRICS |
>> + NVME_F_METADATA_SUPPORTED,
>> + .reg_read32 = nvmf_reg_read32,
>> + .reg_read64 = nvmf_reg_read64,
>> + .reg_write32 = nvmf_reg_write32,
>> + .free_ctrl = nvme_rdma_free_ctrl,
>> + .submit_async_event = nvme_rdma_submit_async_event,
>> + .delete_ctrl = nvme_rdma_delete_ctrl,
>> + .get_address = nvmf_get_address,
>> + .teardown_ctrl_io_queues = _nvme_rdma_teardown_io_queues,
>> + .teardown_ctrl_admin_queue = _nvme_rdma_teardown_admin_queue,
>> };
>> /*
>> @@ -2329,7 +2321,7 @@ static struct nvme_ctrl
>> *nvme_rdma_create_ctrl(struct device *dev,
>> INIT_DELAYED_WORK(&ctrl->ctrl.connect_work,
>> nvme_rdma_reconnect_ctrl_work);
>> - INIT_WORK(&ctrl->ctrl.err_work, nvme_rdma_error_recovery_work);
>> + INIT_WORK(&ctrl->ctrl.err_work, nvmf_error_recovery_work);
>
> This initialization needs to move to the core or fabrics lib.
It's done in the next patches.
More information about the Linux-nvme
mailing list