[PATCH RFC] nvmet: Reference namespaces percpu
Sagi Grimberg
sagig at mellanox.com
Mon Nov 16 01:32:03 PST 2015
In order to avoid cross-cpu or cross-socket contention
on our namespaces referencing in the hot path, use the
much more efficient percpu_ref.
Signed-off-by: Sagi Grimberg <sagig at mellanox.com>
---
FYI, with this patch applied I'm able to get 95% IOPs
out of nvme_loop comparing to raw null_blk. I'm also
able to survive hot namespace removal during stress IO.
drivers/nvme/target/core.c | 38 ++++++++++++++++++++++++++++++++------
drivers/nvme/target/nvmet.h | 4 +++-
2 files changed, 35 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index dc99909..e2a8893 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -37,16 +37,18 @@ struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
rcu_read_lock();
ns = __nvmet_find_namespace(ctrl, nsid);
- if (ns && !kref_get_unless_zero(&ns->ref))
- ns = NULL;
+ if (ns)
+ percpu_ref_get(&ns->ref);
rcu_read_unlock();
return ns;
}
-static void nvmet_destroy_namespace(struct kref *ref)
+static void nvmet_free_ns(struct work_struct *work)
{
- struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
+ struct nvmet_ns *ns = container_of(work, struct nvmet_ns, work);
+
+ percpu_ref_exit(&ns->ref);
if (ns->bdev)
blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
@@ -54,9 +56,19 @@ static void nvmet_destroy_namespace(struct kref *ref)
kfree(ns);
}
+static void nvmet_destroy_namespace(struct percpu_ref *ref)
+{
+ struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
+
+ /* Now that we're off the ns list, we can safely kill percpu_ref */
+ percpu_ref_kill(&ns->ref);
+ INIT_WORK(&ns->work, nvmet_free_ns);
+ schedule_work(&ns->work);
+}
+
void nvmet_put_namespace(struct nvmet_ns *ns)
{
- kref_put(&ns->ref, nvmet_destroy_namespace);
+ percpu_ref_put(&ns->ref);
}
int nvmet_ns_enable(struct nvmet_ns *ns, const char *path)
@@ -109,22 +121,36 @@ void nvmet_ns_free(struct nvmet_ns *ns)
list_del_init(&ns->dev_link);
mutex_unlock(&subsys->lock);
+ /* Should be the final ref! */
nvmet_put_namespace(ns);
}
struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
{
struct nvmet_ns *ns;
+ int ret;
ns = kzalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
return NULL;
INIT_LIST_HEAD(&ns->dev_link);
- kref_init(&ns->ref);
+ ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
+ 0, GFP_KERNEL);
+ if (ret)
+ goto free_ns;
+
+ percpu_ref_get(&ns->ref);
+
ns->nsid = nsid;
ns->subsys = subsys;
+
return ns;
+
+free_ns:
+ kfree(ns);
+
+ return NULL;
}
void nvmet_req_complete(struct nvmet_req *req, u16 status)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 3903d25..4d7a620 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/device.h>
#include <linux/kref.h>
+#include <linux/percpu-refcount.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/nvme.h>
@@ -13,7 +14,7 @@
struct nvmet_ns {
struct list_head dev_link;
- struct kref ref;
+ struct percpu_ref ref;
struct block_device *bdev;
u32 nsid;
u32 blksize_shift;
@@ -26,6 +27,7 @@ struct nvmet_ns {
struct config_group default_groups[2];
struct config_group group;
struct rcu_head rcu;
+ struct work_struct work;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
--
1.7.1
More information about the Linux-nvme
mailing list