[PATCHv6] nvme: allow to re-attach namespaces after all paths are down

Hannes Reinecke hare at suse.de
Mon Jun 21 00:33:24 PDT 2021


On 6/21/21 8:38 AM, Christoph Hellwig wrote:
> On Wed, Jun 09, 2021 at 05:01:18PM +0200, Hannes Reinecke wrote:
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 177cae44b612..6d7c2958b3e2 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -566,6 +566,9 @@ static void nvme_free_ns_head(struct kref *ref)
>>   	struct nvme_ns_head *head =
>>   		container_of(ref, struct nvme_ns_head, ref);
>>   
>> +	mutex_lock(&head->subsys->lock);
>> +	list_del_init(&head->entry);
>> +	mutex_unlock(&head->subsys->lock);
>>   	nvme_mpath_remove_disk(head);
>>   	ida_simple_remove(&head->subsys->ns_ida, head->instance);
>>   	cleanup_srcu_struct(&head->srcu);
>> @@ -3806,8 +3809,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
>>    out_unlink_ns:
>>   	mutex_lock(&ctrl->subsys->lock);
>>   	list_del_rcu(&ns->siblings);
>> -	if (list_empty(&ns->head->list))
>> -		list_del_init(&ns->head->entry);
>>   	mutex_unlock(&ctrl->subsys->lock);
>>   	nvme_put_ns_head(ns->head);
>>    out_free_queue:
>> @@ -3828,8 +3829,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
>>   
>>   	mutex_lock(&ns->ctrl->subsys->lock);
>>   	list_del_rcu(&ns->siblings);
>> -	if (list_empty(&ns->head->list))
>> -		list_del_init(&ns->head->entry);
>>   	mutex_unlock(&ns->ctrl->subsys->lock);
>>   
>>   	synchronize_rcu(); /* guarantee not available in head->list */
>> @@ -3849,7 +3848,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
>>   	list_del_init(&ns->list);
>>   	up_write(&ns->ctrl->namespaces_rwsem);
>>   
>> -	nvme_mpath_check_last_path(ns);
>> +	nvme_mpath_check_last_path(ns->head);
>>   	nvme_put_ns(ns);
>>   }
>>   
>> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
>> index 23573fe3fc7d..31153f6ec582 100644
>> --- a/drivers/nvme/host/multipath.c
>> +++ b/drivers/nvme/host/multipath.c
>> @@ -266,6 +266,8 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
>>   	int node = numa_node_id();
>>   	struct nvme_ns *ns;
>>   
>> +	if (!(head->disk->flags & GENHD_FL_UP))
>> +		return NULL;
>>   	ns = srcu_dereference(head->current_path[node], &head->srcu);
>>   	if (unlikely(!ns))
>>   		return __nvme_find_path(head, node);
>> @@ -281,6 +283,8 @@ static bool nvme_available_path(struct nvme_ns_head *head)
>>   {
>>   	struct nvme_ns *ns;
>>   
>> +	if (!(head->disk->flags & GENHD_FL_UP))
>> +		return false;
>>   	list_for_each_entry_rcu(ns, &head->list, siblings) {
>>   		if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
>>   			continue;
>> @@ -771,20 +775,36 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
>>   #endif
>>   }
>>   
>> -void nvme_mpath_remove_disk(struct nvme_ns_head *head)
>> +void nvme_mpath_check_last_path(struct nvme_ns_head *head)
>>   {
>> +	bool last_path = false;
>>   	if (!head->disk)
>>   		return;
>> +
>> +	/* Synchronize with nvme_init_ns_head() */
>> +	mutex_lock(&head->subsys->lock);
>> +	if (list_empty(&head->list))
>> +		last_path = true;
>> +	mutex_unlock(&head->subsys->lock);
>> +	if (last_path) {
>> +		kblockd_schedule_work(&head->requeue_work);
>> +		if (head->disk->flags & GENHD_FL_UP) {
>> +			nvme_cdev_del(&head->cdev, &head->cdev_device);
>> +			del_gendisk(head->disk);
>> +		}
>>   	}
>> +}
>> +
>> +void nvme_mpath_remove_disk(struct nvme_ns_head *head)
>> +{
>> +	if (!head->disk)
>> +		return;
>>   	blk_set_queue_dying(head->disk->queue);
>>   	/* make sure all pending bios are cleaned up */
>>   	kblockd_schedule_work(&head->requeue_work);
>>   	flush_work(&head->requeue_work);
>>   	blk_cleanup_queue(head->disk->queue);
>> -	if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
>> +	if (!test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
>>   		/*
>>   		 * if device_add_disk wasn't called, prevent
>>   		 * disk release to put a bogus reference on the
> 
> So if a nvme_mpath_set_live comes in between nvme_mpath_check_last_path
> and nvme_mpath_remove_disk we'll end up without a gendisk still, don't we?
> 
I can't see how we can end up there.
If we call nvme_mpath_set_live() we will have to have a reference on 
ns_head, consequently nvme_mpath_remove_disk() will not be called for 
that ns_head as that function is called from the kref callback once all 
references are dropped.

Am I missing something?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare at suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer



More information about the Linux-nvme mailing list