[PATCH v3 7/9] nvme-rdma: serialize controller teardown sequences

Sagi Grimberg sagi at grimberg.me
Thu Aug 20 18:16:39 EDT 2020


>> @@ -997,6 +998,7 @@ static int nvme_rdma_configure_io_queues(struct 
>> nvme_rdma_ctrl *ctrl, bool new)
>>   static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
>>           bool remove)
>>   {
>> +    mutex_lock(&ctrl->teardown_lock);
>>       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
>>       nvme_rdma_stop_queue(&ctrl->queues[0]);
>>       if (ctrl->ctrl.admin_tagset) {
>> @@ -1007,11 +1009,13 @@ static void 
>> nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
>>       if (remove)
>>           blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
>>       nvme_rdma_destroy_admin_queue(ctrl, remove);
>> +    mutex_unlock(&ctrl->teardown_lock);
>>   }
>>   static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
>>           bool remove)
>>   {
>> +    mutex_lock(&ctrl->teardown_lock);
>>       if (ctrl->ctrl.queue_count > 1) {
>>           nvme_start_freeze(&ctrl->ctrl);
>>           nvme_stop_queues(&ctrl->ctrl);
>> @@ -1025,6 +1029,7 @@ static void nvme_rdma_teardown_io_queues(struct 
>> nvme_rdma_ctrl *ctrl,
>>               nvme_start_queues(&ctrl->ctrl);
>>           nvme_rdma_destroy_io_queues(ctrl, remove);
>>       }
>> +    mutex_unlock(&ctrl->teardown_lock);
>>   }
>>   static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
>> @@ -2278,6 +2283,7 @@ static struct nvme_ctrl 
>> *nvme_rdma_create_ctrl(struct device *dev,
>>           return ERR_PTR(-ENOMEM);
>>       ctrl->ctrl.opts = opts;
>>       INIT_LIST_HEAD(&ctrl->list);
>> +    mutex_init(&ctrl->teardown_lock);
>>       if (!(opts->mask & NVMF_OPT_TRSVCID)) {
>>           opts->trsvcid =
> 
> Q: so you don't believe there's any conflict from these teardown paths 
> (possibly invoked by sysfs delete ctrl)  vs  a reconnect (thus 
> nvme_rdma_setup_ctrl) encountering a failure during controller init, 
> thus it hits the destroy_io and destroy_admin exit paths - which call 
> nvme_rdma_destroy_io_queues and stop/destroy_admin_queue - which can be 
> simultaneous to the teardowns and without the mutex ?

Not really, the first thing rdma/tcp does in .delete_ctrl is to
cancel_work_sync the connect_work and the err_work, and any other
invocation of these works cannot occur because the state machine
won't allow it. so that serialization should be sufficient unless I'm
missing something...



More information about the Linux-nvme mailing list