[PATCH V4 2/2] nvme-core: use xarray for ctrl ns tracking
Chaitanya Kulkarni
chaitanya.kulkarni at wdc.com
Sun Jul 19 23:32:03 EDT 2020
This patch replaces the ctrl->namespaces tracking from linked list to
xarray and improves the performance. For host even though
nvme_find_get_ns() doesn't fall into the fast path yet it does for
NVMeOF passthru patch-series which is currently under review. This
prepares us to improve performance for future NVMeOF passthru backend
since nvme_find_get_ns() uses same data structure as it does in target
to find namespace in I/O patch from nsid specified in the nvme_rw_cmd.
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
---
drivers/nvme/host/core.c | 184 ++++++++++++++++------------------
drivers/nvme/host/multipath.c | 15 ++-
drivers/nvme/host/nvme.h | 5 +-
3 files changed, 96 insertions(+), 108 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f49085bcaa42..3f09774d2d54 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1426,9 +1426,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
if (_nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
else if (blk_queue_is_zoned(ns->disk->queue)) {
@@ -1440,7 +1440,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
*/
*effects |= NVME_CMD_EFFECTS_NCC;
}
- up_read(&ctrl->namespaces_rwsem);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -3195,34 +3194,33 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
{
struct nvme_ns *ns;
- int ret;
+ unsigned long idx;
+ int ret = -EINVAL;
+ int count = 0;
- down_read(&ctrl->namespaces_rwsem);
- if (list_empty(&ctrl->namespaces)) {
+ if (xa_empty(&ctrl->namespaces)) {
ret = -ENOTTY;
- goto out_unlock;
+ goto out;
}
-
- ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
- if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
- dev_warn(ctrl->device,
- "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
- ret = -EINVAL;
- goto out_unlock;
+ xa_for_each(&ctrl->namespaces, idx, ns) {
+ if (count > 0)
+ goto err;
+ count++;
}
dev_warn(ctrl->device,
"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
kref_get(&ns->kref);
- up_read(&ctrl->namespaces_rwsem);
ret = nvme_user_cmd(ctrl, ns, argp);
nvme_put_ns(ns);
- return ret;
-out_unlock:
- up_read(&ctrl->namespaces_rwsem);
+out:
return ret;
+err:
+ dev_warn(ctrl->device,
+ "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+ goto out;
}
static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
@@ -3790,31 +3788,16 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
return ret;
}
-static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
-{
- struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
- struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
-
- return nsa->head->ns_id - nsb->head->ns_id;
-}
-
static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
- struct nvme_ns *ns, *ret = NULL;
+ struct nvme_ns *ns;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
- if (ns->head->ns_id == nsid) {
- if (!kref_get_unless_zero(&ns->kref))
- continue;
- ret = ns;
- break;
- }
- if (ns->head->ns_id > nsid)
- break;
- }
- up_read(&ctrl->namespaces_rwsem);
- return ret;
+ rcu_read_lock();
+ ns = xa_load(&ctrl->namespaces, nsid);
+ ns = ns && kref_get_unless_zero(&ns->kref) ? ns : NULL;
+ rcu_read_unlock();
+
+ return ns;
}
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@ -3884,9 +3867,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
}
}
- down_write(&ctrl->namespaces_rwsem);
- list_add_tail(&ns->list, &ctrl->namespaces);
- up_write(&ctrl->namespaces_rwsem);
+ ret = xa_insert(&ctrl->namespaces, nsid, ns, GFP_KERNEL);
+ if (ret)
+ goto out_unregister_nvm;
nvme_get_ctrl(ctrl);
@@ -3897,6 +3880,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
kfree(id);
return;
+ out_unregister_nvm:
+ if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1)
+ nvme_nvm_unregister(ns);
out_put_disk:
/* prevent double queue cleanup */
ns->disk->queue = NULL;
@@ -3929,6 +3915,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
list_del_init(&ns->head->entry);
mutex_unlock(&ns->ctrl->subsys->lock);
+ xa_erase(&ns->ctrl->namespaces, ns->head->ns_id);
synchronize_rcu(); /* guarantee not available in head->list */
nvme_mpath_clear_current_path(ns);
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
@@ -3940,10 +3927,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
blk_integrity_unregister(ns->disk);
}
- down_write(&ns->ctrl->namespaces_rwsem);
- list_del_init(&ns->list);
- up_write(&ns->ctrl->namespaces_rwsem);
-
nvme_mpath_check_last_path(ns);
nvme_put_ns(ns);
}
@@ -3974,19 +3957,31 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid)
{
- struct nvme_ns *ns, *next;
- LIST_HEAD(rm_list);
+ struct xarray rm_array;
+ unsigned long tnsid;
+ struct nvme_ns *ns;
+ unsigned long idx;
+ int ret;
- down_write(&ctrl->namespaces_rwsem);
- list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
- if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
- list_move_tail(&ns->list, &rm_list);
+ xa_init(&rm_array);
+
+ xa_lock(&ctrl->namespaces);
+ xa_for_each(&ctrl->namespaces, idx, ns) {
+ tnsid = ns->head->ns_id;
+ if (tnsid > nsid || test_bit(NVME_NS_DEAD, &ns->flags)) {
+ __xa_erase(&ctrl->namespaces, tnsid);
+ xa_unlock(&ctrl->namespaces);
+ /* Even if insert fails keep going */
+ ret = xa_insert(&rm_array, nsid, ns, GFP_KERNEL);
+ if (ret)
+ pr_err("xa_insert %d\n", ret);
+ xa_lock(&ctrl->namespaces);
+ }
}
- up_write(&ctrl->namespaces_rwsem);
+ xa_unlock(&ctrl->namespaces);
- list_for_each_entry_safe(ns, next, &rm_list, list)
+ xa_for_each(&rm_array, idx, ns)
nvme_ns_remove(ns);
-
}
static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
@@ -4084,10 +4079,6 @@ static void nvme_scan_work(struct work_struct *work)
if (nvme_scan_ns_list(ctrl) != 0)
nvme_scan_ns_sequential(ctrl);
mutex_unlock(&ctrl->scan_lock);
-
- down_write(&ctrl->namespaces_rwsem);
- list_sort(NULL, &ctrl->namespaces, ns_cmp);
- up_write(&ctrl->namespaces_rwsem);
}
/*
@@ -4097,8 +4088,12 @@ static void nvme_scan_work(struct work_struct *work)
*/
void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns, *next;
- LIST_HEAD(ns_list);
+ struct xarray rm_array;
+ struct nvme_ns *ns;
+ unsigned long idx;
+ int ret;
+
+ xa_init(&rm_array);
/*
* make sure to requeue I/O to all namespaces as these
@@ -4119,11 +4114,18 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
if (ctrl->state == NVME_CTRL_DEAD)
nvme_kill_queues(ctrl);
- down_write(&ctrl->namespaces_rwsem);
- list_splice_init(&ctrl->namespaces, &ns_list);
- up_write(&ctrl->namespaces_rwsem);
+ xa_lock(&ctrl->namespaces);
+ xa_for_each(&ctrl->namespaces, idx, ns) {
+ __xa_erase(&ctrl->namespaces, ns->head->ns_id);
+ xa_unlock(&ctrl->namespaces);
+ ret = xa_insert(&rm_array, ns->head->ns_id, ns, GFP_KERNEL);
+ if (ret)
+ pr_err("xa_insert %d\n", ret);
+ xa_lock(&ctrl->namespaces);
+ }
+ xa_unlock(&ctrl->namespaces);
- list_for_each_entry_safe(ns, next, &ns_list, list)
+ xa_for_each(&rm_array, idx, ns)
nvme_ns_remove(ns);
}
EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
@@ -4344,6 +4346,10 @@ static void nvme_free_ctrl(struct device *dev)
if (subsys && ctrl->instance != subsys->instance)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+ WARN_ON_ONCE(!xa_empty(&ctrl->namespaces));
+
+ xa_destroy(&ctrl->namespaces);
+
list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
list_del(&cel->entry);
kfree(cel);
@@ -4378,9 +4384,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->state = NVME_CTRL_NEW;
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
- INIT_LIST_HEAD(&ctrl->namespaces);
INIT_LIST_HEAD(&ctrl->cels);
- init_rwsem(&ctrl->namespaces_rwsem);
+ xa_init(&ctrl->namespaces);
ctrl->dev = dev;
ctrl->ops = ops;
ctrl->quirks = quirks;
@@ -4460,98 +4465,87 @@ EXPORT_SYMBOL_GPL(nvme_init_ctrl);
void nvme_kill_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
-
- down_read(&ctrl->namespaces_rwsem);
+ unsigned long idx;
/* Forcibly unquiesce queues to avoid blocking dispatch */
if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
blk_mq_unquiesce_queue(ctrl->admin_q);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
nvme_set_queue_dying(ns);
-
- up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_kill_queues);
void nvme_unfreeze(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
blk_mq_unfreeze_queue(ns->queue);
- up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_unfreeze);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
+ xa_for_each(&ctrl->namespaces, idx, ns) {
timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
if (timeout <= 0)
break;
}
- up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
void nvme_wait_freeze(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
blk_mq_freeze_queue_wait(ns->queue);
- up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_wait_freeze);
void nvme_start_freeze(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
blk_freeze_queue_start(ns->queue);
- up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_start_freeze);
void nvme_stop_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
blk_mq_quiesce_queue(ns->queue);
- up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_stop_queues);
void nvme_start_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
blk_mq_unquiesce_queue(ns->queue);
- up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
-
void nvme_sync_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
blk_sync_queue(ns->queue);
- up_read(&ctrl->namespaces_rwsem);
if (ctrl->admin_q)
blk_sync_queue(ctrl->admin_q);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 74bad4e3d377..af486864a1dc 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -115,13 +115,12 @@ bool nvme_failover_req(struct request *req)
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
+ xa_for_each(&ctrl->namespaces, idx, ns) {
if (ns->head->disk)
kblockd_schedule_work(&ns->head->requeue_work);
}
- up_read(&ctrl->namespaces_rwsem);
}
static const char *nvme_ana_state_names[] = {
@@ -155,13 +154,12 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ unsigned long idx;
mutex_lock(&ctrl->scan_lock);
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ xa_for_each(&ctrl->namespaces, idx, ns)
if (nvme_mpath_clear_current_path(ns))
kblockd_schedule_work(&ns->head->requeue_work);
- up_read(&ctrl->namespaces_rwsem);
mutex_unlock(&ctrl->scan_lock);
}
@@ -495,6 +493,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
unsigned *nr_change_groups = data;
struct nvme_ns *ns;
+ unsigned long idx;
dev_dbg(ctrl->device, "ANA group %d: %s.\n",
le32_to_cpu(desc->grpid),
@@ -506,8 +505,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
if (!nr_nsids)
return 0;
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
+ xa_for_each(&ctrl->namespaces, idx, ns) {
unsigned nsid = le32_to_cpu(desc->nsids[n]);
if (ns->head->ns_id < nsid)
@@ -517,7 +515,6 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
if (++n == nr_nsids)
break;
}
- up_read(&ctrl->namespaces_rwsem);
return 0;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 13ca90bcd352..d1b9761aff9c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -212,8 +212,7 @@ struct nvme_ctrl {
int numa_node;
struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset;
- struct list_head namespaces;
- struct rw_semaphore namespaces_rwsem;
+ struct xarray namespaces;
struct device ctrl_device;
struct device *device; /* char device */
struct cdev cdev;
@@ -388,8 +387,6 @@ enum nvme_ns_features {
};
struct nvme_ns {
- struct list_head list;
-
struct nvme_ctrl *ctrl;
struct request_queue *queue;
struct gendisk *disk;
--
2.26.0
More information about the Linux-nvme
mailing list