[PATCH V4 2/2] nvme-core: use xarray for ctrl ns tracking

Chaitanya Kulkarni chaitanya.kulkarni at wdc.com
Sun Jul 19 23:32:03 EDT 2020


This patch replaces the ctrl->namespaces tracking from linked list to
xarray and improves the performance. For host even though
nvme_find_get_ns() doesn't fall into the fast path yet it does for
NVMeOF passthru patch-series which is currently under review. This
prepares us to improve performance for future NVMeOF passthru backend
since nvme_find_get_ns() uses same data structure as it does in target
to find namespace in I/O patch from nsid specified in the nvme_rw_cmd.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
---
 drivers/nvme/host/core.c      | 184 ++++++++++++++++------------------
 drivers/nvme/host/multipath.c |  15 ++-
 drivers/nvme/host/nvme.h      |   5 +-
 3 files changed, 96 insertions(+), 108 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f49085bcaa42..3f09774d2d54 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1426,9 +1426,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		if (_nvme_revalidate_disk(ns->disk))
 			nvme_set_queue_dying(ns);
 		else if (blk_queue_is_zoned(ns->disk->queue)) {
@@ -1440,7 +1440,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
 			 */
 			*effects |= NVME_CMD_EFFECTS_NCC;
 		}
-	up_read(&ctrl->namespaces_rwsem);
 }
 
 static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -3195,34 +3194,33 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
 {
 	struct nvme_ns *ns;
-	int ret;
+	unsigned long idx;
+	int ret = -EINVAL;
+	int count = 0;
 
-	down_read(&ctrl->namespaces_rwsem);
-	if (list_empty(&ctrl->namespaces)) {
+	if (xa_empty(&ctrl->namespaces)) {
 		ret = -ENOTTY;
-		goto out_unlock;
+		goto out;
 	}
-
-	ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
-	if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
-		dev_warn(ctrl->device,
-			"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
-		ret = -EINVAL;
-		goto out_unlock;
+	xa_for_each(&ctrl->namespaces, idx, ns) {
+		if (count > 0)
+			goto err;
+		count++;
 	}
 
 	dev_warn(ctrl->device,
 		"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
 	kref_get(&ns->kref);
-	up_read(&ctrl->namespaces_rwsem);
 
 	ret = nvme_user_cmd(ctrl, ns, argp);
 	nvme_put_ns(ns);
-	return ret;
 
-out_unlock:
-	up_read(&ctrl->namespaces_rwsem);
+out:
 	return ret;
+err:
+	dev_warn(ctrl->device,
+			"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+	goto out;
 }
 
 static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
@@ -3790,31 +3788,16 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 	return ret;
 }
 
-static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
-{
-	struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
-	struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
-
-	return nsa->head->ns_id - nsb->head->ns_id;
-}
-
 static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
-	struct nvme_ns *ns, *ret = NULL;
+	struct nvme_ns *ns;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
-		if (ns->head->ns_id == nsid) {
-			if (!kref_get_unless_zero(&ns->kref))
-				continue;
-			ret = ns;
-			break;
-		}
-		if (ns->head->ns_id > nsid)
-			break;
-	}
-	up_read(&ctrl->namespaces_rwsem);
-	return ret;
+	rcu_read_lock();
+	ns = xa_load(&ctrl->namespaces, nsid);
+	ns = ns && kref_get_unless_zero(&ns->kref) ? ns : NULL;
+	rcu_read_unlock();
+
+	return ns;
 }
 
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@ -3884,9 +3867,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 		}
 	}
 
-	down_write(&ctrl->namespaces_rwsem);
-	list_add_tail(&ns->list, &ctrl->namespaces);
-	up_write(&ctrl->namespaces_rwsem);
+	ret = xa_insert(&ctrl->namespaces, nsid, ns, GFP_KERNEL);
+	if (ret)
+		goto out_unregister_nvm;
 
 	nvme_get_ctrl(ctrl);
 
@@ -3897,6 +3880,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	kfree(id);
 
 	return;
+ out_unregister_nvm:
+	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1)
+		nvme_nvm_unregister(ns);
  out_put_disk:
 	/* prevent double queue cleanup */
 	ns->disk->queue = NULL;
@@ -3929,6 +3915,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		list_del_init(&ns->head->entry);
 	mutex_unlock(&ns->ctrl->subsys->lock);
 
+	xa_erase(&ns->ctrl->namespaces, ns->head->ns_id);
 	synchronize_rcu(); /* guarantee not available in head->list */
 	nvme_mpath_clear_current_path(ns);
 	synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
@@ -3940,10 +3927,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 			blk_integrity_unregister(ns->disk);
 	}
 
-	down_write(&ns->ctrl->namespaces_rwsem);
-	list_del_init(&ns->list);
-	up_write(&ns->ctrl->namespaces_rwsem);
-
 	nvme_mpath_check_last_path(ns);
 	nvme_put_ns(ns);
 }
@@ -3974,19 +3957,31 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					unsigned nsid)
 {
-	struct nvme_ns *ns, *next;
-	LIST_HEAD(rm_list);
+	struct xarray rm_array;
+	unsigned long tnsid;
+	struct nvme_ns *ns;
+	unsigned long idx;
+	int ret;
 
-	down_write(&ctrl->namespaces_rwsem);
-	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
-		if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
-			list_move_tail(&ns->list, &rm_list);
+	xa_init(&rm_array);
+
+	xa_lock(&ctrl->namespaces);
+	xa_for_each(&ctrl->namespaces, idx, ns) {
+		tnsid = ns->head->ns_id;
+		if (tnsid > nsid || test_bit(NVME_NS_DEAD, &ns->flags)) {
+			__xa_erase(&ctrl->namespaces, tnsid);
+			xa_unlock(&ctrl->namespaces);
+			/* Even if insert fails keep going */
+			ret = xa_insert(&rm_array, nsid, ns, GFP_KERNEL);
+			if (ret)
+				pr_err("xa_insert %d\n", ret);
+			xa_lock(&ctrl->namespaces);
+		}
 	}
-	up_write(&ctrl->namespaces_rwsem);
+	xa_unlock(&ctrl->namespaces);
 
-	list_for_each_entry_safe(ns, next, &rm_list, list)
+	xa_for_each(&rm_array, idx, ns)
 		nvme_ns_remove(ns);
-
 }
 
 static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
@@ -4084,10 +4079,6 @@ static void nvme_scan_work(struct work_struct *work)
 	if (nvme_scan_ns_list(ctrl) != 0)
 		nvme_scan_ns_sequential(ctrl);
 	mutex_unlock(&ctrl->scan_lock);
-
-	down_write(&ctrl->namespaces_rwsem);
-	list_sort(NULL, &ctrl->namespaces, ns_cmp);
-	up_write(&ctrl->namespaces_rwsem);
 }
 
 /*
@@ -4097,8 +4088,12 @@ static void nvme_scan_work(struct work_struct *work)
  */
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
-	struct nvme_ns *ns, *next;
-	LIST_HEAD(ns_list);
+	struct xarray rm_array;
+	struct nvme_ns *ns;
+	unsigned long idx;
+	int ret;
+
+	xa_init(&rm_array);
 
 	/*
 	 * make sure to requeue I/O to all namespaces as these
@@ -4119,11 +4114,18 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	if (ctrl->state == NVME_CTRL_DEAD)
 		nvme_kill_queues(ctrl);
 
-	down_write(&ctrl->namespaces_rwsem);
-	list_splice_init(&ctrl->namespaces, &ns_list);
-	up_write(&ctrl->namespaces_rwsem);
+	xa_lock(&ctrl->namespaces);
+	xa_for_each(&ctrl->namespaces, idx, ns) {
+		__xa_erase(&ctrl->namespaces, ns->head->ns_id);
+		xa_unlock(&ctrl->namespaces);
+		ret = xa_insert(&rm_array, ns->head->ns_id, ns, GFP_KERNEL);
+		if (ret)
+			pr_err("xa_insert %d\n", ret);
+		xa_lock(&ctrl->namespaces);
+	}
+	xa_unlock(&ctrl->namespaces);
 
-	list_for_each_entry_safe(ns, next, &ns_list, list)
+	xa_for_each(&rm_array, idx, ns)
 		nvme_ns_remove(ns);
 }
 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
@@ -4344,6 +4346,10 @@ static void nvme_free_ctrl(struct device *dev)
 	if (subsys && ctrl->instance != subsys->instance)
 		ida_simple_remove(&nvme_instance_ida, ctrl->instance);
 
+	WARN_ON_ONCE(!xa_empty(&ctrl->namespaces));
+
+	xa_destroy(&ctrl->namespaces);
+
 	list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
 		list_del(&cel->entry);
 		kfree(cel);
@@ -4378,9 +4384,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	ctrl->state = NVME_CTRL_NEW;
 	spin_lock_init(&ctrl->lock);
 	mutex_init(&ctrl->scan_lock);
-	INIT_LIST_HEAD(&ctrl->namespaces);
 	INIT_LIST_HEAD(&ctrl->cels);
-	init_rwsem(&ctrl->namespaces_rwsem);
+	xa_init(&ctrl->namespaces);
 	ctrl->dev = dev;
 	ctrl->ops = ops;
 	ctrl->quirks = quirks;
@@ -4460,98 +4465,87 @@ EXPORT_SYMBOL_GPL(nvme_init_ctrl);
 void nvme_kill_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
-
-	down_read(&ctrl->namespaces_rwsem);
+	unsigned long idx;
 
 	/* Forcibly unquiesce queues to avoid blocking dispatch */
 	if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
 		blk_mq_unquiesce_queue(ctrl->admin_q);
 
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		nvme_set_queue_dying(ns);
-
-	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);
 
 void nvme_unfreeze(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		blk_mq_unfreeze_queue(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_unfreeze);
 
 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	xa_for_each(&ctrl->namespaces, idx, ns) {
 		timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
 		if (timeout <= 0)
 			break;
 	}
-	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
 
 void nvme_wait_freeze(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		blk_mq_freeze_queue_wait(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_wait_freeze);
 
 void nvme_start_freeze(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		blk_freeze_queue_start(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_start_freeze);
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		blk_mq_quiesce_queue(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_queues);
 
 void nvme_start_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		blk_mq_unquiesce_queue(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
-
 void nvme_sync_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		blk_sync_queue(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
 
 	if (ctrl->admin_q)
 		blk_sync_queue(ctrl->admin_q);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 74bad4e3d377..af486864a1dc 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -115,13 +115,12 @@ bool nvme_failover_req(struct request *req)
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	xa_for_each(&ctrl->namespaces, idx, ns) {
 		if (ns->head->disk)
 			kblockd_schedule_work(&ns->head->requeue_work);
 	}
-	up_read(&ctrl->namespaces_rwsem);
 }
 
 static const char *nvme_ana_state_names[] = {
@@ -155,13 +154,12 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	unsigned long idx;
 
 	mutex_lock(&ctrl->scan_lock);
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	xa_for_each(&ctrl->namespaces, idx, ns)
 		if (nvme_mpath_clear_current_path(ns))
 			kblockd_schedule_work(&ns->head->requeue_work);
-	up_read(&ctrl->namespaces_rwsem);
 	mutex_unlock(&ctrl->scan_lock);
 }
 
@@ -495,6 +493,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
 	u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
 	unsigned *nr_change_groups = data;
 	struct nvme_ns *ns;
+	unsigned long idx;
 
 	dev_dbg(ctrl->device, "ANA group %d: %s.\n",
 			le32_to_cpu(desc->grpid),
@@ -506,8 +505,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
 	if (!nr_nsids)
 		return 0;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	xa_for_each(&ctrl->namespaces, idx, ns) {
 		unsigned nsid = le32_to_cpu(desc->nsids[n]);
 
 		if (ns->head->ns_id < nsid)
@@ -517,7 +515,6 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
 		if (++n == nr_nsids)
 			break;
 	}
-	up_read(&ctrl->namespaces_rwsem);
 	return 0;
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 13ca90bcd352..d1b9761aff9c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -212,8 +212,7 @@ struct nvme_ctrl {
 	int numa_node;
 	struct blk_mq_tag_set *tagset;
 	struct blk_mq_tag_set *admin_tagset;
-	struct list_head namespaces;
-	struct rw_semaphore namespaces_rwsem;
+	struct xarray namespaces;
 	struct device ctrl_device;
 	struct device *device;	/* char device */
 	struct cdev cdev;
@@ -388,8 +387,6 @@ enum nvme_ns_features {
 };
 
 struct nvme_ns {
-	struct list_head list;
-
 	struct nvme_ctrl *ctrl;
 	struct request_queue *queue;
 	struct gendisk *disk;
-- 
2.26.0




More information about the Linux-nvme mailing list