[PATCH 06/47] blk-mq: add a flags parameter to blk_mq_alloc_request
Christoph Hellwig
hch at lst.de
Fri Nov 20 23:19:46 PST 2015
We already have the reserved flag, and a nowait flag awkwardly encoded as
a gfp_t. Add a real flags argument to make the scheme more extensible and
allow for a nicer calling convention.
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
block/blk-core.c | 11 +-
block/blk-mq-tag.c | 11 +-
block/blk-mq.c | 20 +-
block/blk-mq.h | 11 +-
block/blk.h | 2 +-
drivers/block/mtip32xx/mtip32xx.c | 2 +-
drivers/nvme/host/core.c | 1172 +++++++++++++++++++++++++++++++++++++
drivers/nvme/host/pci.c | 11 +-
include/linux/blk-mq.h | 8 +-
9 files changed, 1210 insertions(+), 38 deletions(-)
create mode 100644 drivers/nvme/host/core.c
diff --git a/block/blk-core.c b/block/blk-core.c
index af9c315..d2100aa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -630,7 +630,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
}
EXPORT_SYMBOL(blk_alloc_queue);
-int blk_queue_enter(struct request_queue *q, gfp_t gfp)
+int blk_queue_enter(struct request_queue *q, bool nowait)
{
while (true) {
int ret;
@@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
if (percpu_ref_tryget_live(&q->q_usage_counter))
return 0;
- if (!gfpflags_allow_blocking(gfp))
+ if (nowait)
return -EBUSY;
ret = wait_event_interruptible(q->mq_freeze_wq,
@@ -1284,7 +1284,9 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{
if (q->mq_ops)
- return blk_mq_alloc_request(q, rw, gfp_mask, false);
+ return blk_mq_alloc_request(q, rw,
+ (gfp_mask & __GFP_DIRECT_RECLAIM) ?
+ 0 : BLK_MQ_REQ_NOWAIT);
else
return blk_old_get_request(q, rw, gfp_mask);
}
@@ -2052,8 +2054,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
-
+ if (likely(blk_queue_enter(q, false) == 0)) {
ret = q->make_request_fn(q, bio);
blk_queue_exit(q);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a07ca34..abdbb47 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
if (tag != -1)
return tag;
- if (!gfpflags_allow_blocking(data->gfp))
+ if (data->flags & BLK_MQ_REQ_NOWAIT)
return -1;
bs = bt_wait_ptr(bt, hctx);
@@ -303,7 +303,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
data->ctx = blk_mq_get_ctx(data->q);
data->hctx = data->q->mq_ops->map_queue(data->q,
data->ctx->cpu);
- if (data->reserved) {
+ if (data->flags & BLK_MQ_REQ_RESERVED) {
bt = &data->hctx->tags->breserved_tags;
} else {
last_tag = &data->ctx->last_tag;
@@ -349,10 +349,9 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
- if (!data->reserved)
- return __blk_mq_get_tag(data);
-
- return __blk_mq_get_reserved_tag(data);
+ if (data->flags & BLK_MQ_REQ_RESERVED)
+ return __blk_mq_get_reserved_tag(data);
+ return __blk_mq_get_tag(data);
}
static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c932605..6da03f1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -230,8 +230,8 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
return NULL;
}
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
- bool reserved)
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+ unsigned int flags)
{
struct blk_mq_ctx *ctx;
struct blk_mq_hw_ctx *hctx;
@@ -239,24 +239,22 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
struct blk_mq_alloc_data alloc_data;
int ret;
- ret = blk_queue_enter(q, gfp);
+ ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
if (ret)
return ERR_PTR(ret);
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
- blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
- reserved, ctx, hctx);
+ blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
- if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
+ if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
- blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx,
- hctx);
+ blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
ctx = alloc_data.ctx;
}
@@ -1181,8 +1179,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
rw |= REQ_SYNC;
trace_block_getrq(q, bio, rw);
- blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx,
- hctx);
+ blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
if (unlikely(!rq)) {
__blk_mq_run_hw_queue(hctx);
@@ -1191,8 +1188,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
- blk_mq_set_alloc_data(&alloc_data, q,
- __GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx);
+ blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
ctx = alloc_data.ctx;
hctx = alloc_data.hctx;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 713820b..eaede8e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -96,8 +96,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
struct blk_mq_alloc_data {
/* input parameter */
struct request_queue *q;
- gfp_t gfp;
- bool reserved;
+ unsigned int flags;
/* input & output parameter */
struct blk_mq_ctx *ctx;
@@ -105,13 +104,11 @@ struct blk_mq_alloc_data {
};
static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
- struct request_queue *q, gfp_t gfp, bool reserved,
- struct blk_mq_ctx *ctx,
- struct blk_mq_hw_ctx *hctx)
+ struct request_queue *q, unsigned int flags,
+ struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
{
data->q = q;
- data->gfp = gfp;
- data->reserved = reserved;
+ data->flags = flags;
data->ctx = ctx;
data->hctx = hctx;
}
diff --git a/block/blk.h b/block/blk.h
index 1d95107..38bf997 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -72,7 +72,7 @@ void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
bool __blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes);
-int blk_queue_enter(struct request_queue *q, gfp_t gfp);
+int blk_queue_enter(struct request_queue *q, bool nowait);
void blk_queue_exit(struct request_queue *q);
void blk_freeze_queue(struct request_queue *q);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index a28a562..cf3b51a 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
{
struct request *rq;
- rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
+ rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED);
return blk_mq_rq_to_pdu(rq);
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
new file mode 100644
index 0000000..53cf507
--- /dev/null
+++ b/drivers/nvme/host/core.c
@@ -0,0 +1,1172 @@
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/errno.h>
+#include <linux/hdreg.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/pr.h>
+#include <linux/ptrace.h>
+#include <linux/nvme_ioctl.h>
+#include <linux/t10-pi.h>
+#include <scsi/sg.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+#define NVME_MINORS (1U << MINORBITS)
+
+static int nvme_major;
+module_param(nvme_major, int, 0);
+
+static int nvme_char_major;
+module_param(nvme_char_major, int, 0);
+
+static LIST_HEAD(nvme_ctrl_list);
+DEFINE_SPINLOCK(dev_list_lock);
+
+static struct class *nvme_class;
+
+static void nvme_free_ns(struct kref *kref)
+{
+ struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+ if (ns->type == NVME_NS_LIGHTNVM)
+ nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ nvme_put_ctrl(ns->ctrl);
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
+static void nvme_put_ns(struct nvme_ns *ns)
+{
+ kref_put(&ns->kref, nvme_free_ns);
+}
+
+static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
+{
+ struct nvme_ns *ns;
+
+ spin_lock(&dev_list_lock);
+ ns = disk->private_data;
+ if (ns && !kref_get_unless_zero(&ns->kref))
+ ns = NULL;
+ spin_unlock(&dev_list_lock);
+
+ return ns;
+}
+
+static struct request *nvme_alloc_request(struct request_queue *q,
+ struct nvme_command *cmd)
+{
+ bool write = cmd->common.opcode & 1;
+ struct request *req;
+
+ req = blk_mq_alloc_request(q, write, 0);
+ if (IS_ERR(req))
+ return req;
+
+ req->cmd_type = REQ_TYPE_DRV_PRIV;
+ req->cmd_flags |= REQ_FAILFAST_DRIVER;
+ req->__data_len = 0;
+ req->__sector = (sector_t) -1;
+ req->bio = req->biotail = NULL;
+
+ req->cmd = (unsigned char *)cmd;
+ req->cmd_len = sizeof(struct nvme_command);
+ req->special = (void *)0;
+
+ return req;
+}
+
+/*
+ * Returns 0 on success. If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
+{
+ struct request *req;
+ int ret;
+
+ req = nvme_alloc_request(q, cmd);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+ if (buffer && bufflen) {
+ ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
+ if (ret)
+ goto out;
+ }
+
+ blk_execute_rq(req->q, NULL, req, 0);
+ if (result)
+ *result = (u32)(uintptr_t)req->special;
+ ret = req->errors;
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen)
+{
+ return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
+}
+
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen,
+ void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+ u32 *result, unsigned timeout)
+{
+ bool write = cmd->common.opcode & 1;
+ struct nvme_ns *ns = q->queuedata;
+ struct gendisk *disk = ns ? ns->disk : NULL;
+ struct request *req;
+ struct bio *bio = NULL;
+ void *meta = NULL;
+ int ret;
+
+ req = nvme_alloc_request(q, cmd);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+ if (ubuffer && bufflen) {
+ ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+ GFP_KERNEL);
+ if (ret)
+ goto out;
+ bio = req->bio;
+
+ if (!disk)
+ goto submit;
+ bio->bi_bdev = bdget_disk(disk, 0);
+ if (!bio->bi_bdev) {
+ ret = -ENODEV;
+ goto out_unmap;
+ }
+
+ if (meta_buffer) {
+ struct bio_integrity_payload *bip;
+
+ meta = kmalloc(meta_len, GFP_KERNEL);
+ if (!meta) {
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ if (write) {
+ if (copy_from_user(meta, meta_buffer,
+ meta_len)) {
+ ret = -EFAULT;
+ goto out_free_meta;
+ }
+ }
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+ if (!bip) {
+ ret = -ENOMEM;
+ goto out_free_meta;
+ }
+
+ bip->bip_iter.bi_size = meta_len;
+ bip->bip_iter.bi_sector = meta_seed;
+
+ ret = bio_integrity_add_page(bio, virt_to_page(meta),
+ meta_len, offset_in_page(meta));
+ if (ret != meta_len) {
+ ret = -ENOMEM;
+ goto out_free_meta;
+ }
+ }
+ }
+ submit:
+ blk_execute_rq(req->q, disk, req, 0);
+ ret = req->errors;
+ if (result)
+ *result = (u32)(uintptr_t)req->special;
+ if (meta && !ret && !write) {
+ if (copy_to_user(meta_buffer, meta, meta_len))
+ ret = -EFAULT;
+ }
+ out_free_meta:
+ kfree(meta);
+ out_unmap:
+ if (bio) {
+ if (disk && bio->bi_bdev)
+ bdput(bio->bi_bdev);
+ blk_rq_unmap_user(bio);
+ }
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen, u32 *result,
+ unsigned timeout)
+{
+ return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
+ result, timeout);
+}
+
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(1);
+
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
+{
+ struct nvme_command c = { };
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(2);
+ c.identify.nsid = cpu_to_le32(nsid);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
+}
+
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+ struct nvme_id_ns **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
+ *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ns));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
+ dma_addr_t dma_addr, u32 *result)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_get_features;
+ c.features.nsid = cpu_to_le32(nsid);
+ c.features.prp1 = cpu_to_le64(dma_addr);
+ c.features.fid = cpu_to_le32(fid);
+
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+ dma_addr_t dma_addr, u32 *result)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.prp1 = cpu_to_le64(dma_addr);
+ c.features.fid = cpu_to_le32(fid);
+ c.features.dword11 = cpu_to_le32(dword11);
+
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
+{
+ struct nvme_command c = { };
+ int error;
+
+ c.common.opcode = nvme_admin_get_log_page,
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+ c.common.cdw10[0] = cpu_to_le32(
+ (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+ NVME_LOG_SMART),
+
+ *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+ if (!*log)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+ sizeof(struct nvme_smart_log));
+ if (error)
+ kfree(*log);
+ return error;
+}
+
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+ struct nvme_user_io io;
+ struct nvme_command c;
+ unsigned length, meta_len;
+ void __user *metadata;
+
+ if (copy_from_user(&io, uio, sizeof(io)))
+ return -EFAULT;
+
+ switch (io.opcode) {
+ case nvme_cmd_write:
+ case nvme_cmd_read:
+ case nvme_cmd_compare:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ length = (io.nblocks + 1) << ns->lba_shift;
+ meta_len = (io.nblocks + 1) * ns->ms;
+ metadata = (void __user *)(uintptr_t)io.metadata;
+
+ if (ns->ext) {
+ length += meta_len;
+ meta_len = 0;
+ } else if (meta_len) {
+ if ((io.metadata & 3) || !io.metadata)
+ return -EINVAL;
+ }
+
+ memset(&c, 0, sizeof(c));
+ c.rw.opcode = io.opcode;
+ c.rw.flags = io.flags;
+ c.rw.nsid = cpu_to_le32(ns->ns_id);
+ c.rw.slba = cpu_to_le64(io.slba);
+ c.rw.length = cpu_to_le16(io.nblocks);
+ c.rw.control = cpu_to_le16(io.control);
+ c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
+ c.rw.reftag = cpu_to_le32(io.reftag);
+ c.rw.apptag = cpu_to_le16(io.apptag);
+ c.rw.appmask = cpu_to_le16(io.appmask);
+
+ return __nvme_submit_user_cmd(ns->queue, &c,
+ (void __user *)(uintptr_t)io.addr, length,
+ metadata, meta_len, io.slba, NULL, 0);
+}
+
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_passthru_cmd __user *ucmd)
+{
+ struct nvme_passthru_cmd cmd;
+ struct nvme_command c;
+ unsigned timeout = 0;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = cmd.opcode;
+ c.common.flags = cmd.flags;
+ c.common.nsid = cpu_to_le32(cmd.nsid);
+ c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+ c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+ c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+ c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+ c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+ c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+ c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+ status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+ (void __user *)cmd.addr, cmd.data_len,
+ &cmd.result, timeout);
+ if (status >= 0) {
+ if (put_user(cmd.result, &ucmd->result))
+ return -EFAULT;
+ }
+
+ return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+ switch (cmd) {
+ case NVME_IOCTL_ID:
+ force_successful_syscall_return();
+ return ns->ns_id;
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
+ case NVME_IOCTL_IO_CMD:
+ return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
+ case NVME_IOCTL_SUBMIT_IO:
+ return nvme_submit_io(ns, (void __user *)arg);
+ case SG_GET_VERSION_NUM:
+ return nvme_sg_get_version_num((void __user *)arg);
+ case SG_IO:
+ return nvme_sg_io(ns, (void __user *)arg);
+ default:
+ return -ENOTTY;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case SG_IO:
+ return -ENOIOCTLCMD;
+ }
+ return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl NULL
+#endif
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+ return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+}
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+ nvme_put_ns(disk->private_data);
+}
+
+static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+ /* some standard values */
+ geo->heads = 1 << 6;
+ geo->sectors = 1 << 5;
+ geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
+ return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+ struct blk_integrity integrity;
+
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE3:
+ integrity.profile = &t10_pi_type3_crc;
+ break;
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ integrity.profile = &t10_pi_type1_crc;
+ break;
+ default:
+ integrity.profile = NULL;
+ break;
+ }
+ integrity.tuple_size = ns->ms;
+ blk_integrity_register(ns->disk, &integrity);
+ blk_queue_max_integrity_segments(ns->queue, 1);
+}
+#else
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+static void nvme_config_discard(struct nvme_ns *ns)
+{
+ u32 logical_block_size = queue_logical_block_size(ns->queue);
+ ns->queue->limits.discard_zeroes_data = 0;
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+}
+
+static int nvme_revalidate_disk(struct gendisk *disk)
+{
+ struct nvme_ns *ns = disk->private_data;
+ struct nvme_id_ns *id;
+ u8 lbaf, pi_type;
+ u16 old_ms;
+ unsigned short bs;
+
+ if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
+ dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
+ __func__, ns->ctrl->instance, ns->ns_id);
+ return -ENODEV;
+ }
+ if (id->ncap == 0) {
+ kfree(id);
+ return -ENODEV;
+ }
+
+ if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
+ if (nvme_nvm_register(ns->queue, disk->disk_name)) {
+ dev_warn(ns->ctrl->dev,
+ "%s: LightNVM init failure\n", __func__);
+ kfree(id);
+ return -ENODEV;
+ }
+ ns->type = NVME_NS_LIGHTNVM;
+ }
+
+ old_ms = ns->ms;
+ lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
+ ns->lba_shift = id->lbaf[lbaf].ds;
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+ /*
+ * If identify namespace failed, use default 512 byte block size so
+ * block layer can use before failing read/write for 0 capacity.
+ */
+ if (ns->lba_shift == 0)
+ ns->lba_shift = 9;
+ bs = 1 << ns->lba_shift;
+ /* XXX: PI implementation requires metadata equal t10 pi tuple size */
+ pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
+ id->dps & NVME_NS_DPS_PI_MASK : 0;
+
+ blk_mq_freeze_queue(disk->queue);
+ if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
+ ns->ms != old_ms ||
+ bs != queue_logical_block_size(disk->queue) ||
+ (ns->ms && ns->ext)))
+ blk_integrity_unregister(disk);
+
+ ns->pi_type = pi_type;
+ blk_queue_logical_block_size(ns->queue, bs);
+
+ if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
+ nvme_init_integrity(ns);
+ if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
+ set_capacity(disk, 0);
+ else
+ set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+ if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ nvme_config_discard(ns);
+ blk_mq_unfreeze_queue(disk->queue);
+
+ kfree(id);
+ return 0;
+}
+
+static char nvme_pr_type(enum pr_type type)
+{
+ switch (type) {
+ case PR_WRITE_EXCLUSIVE:
+ return 1;
+ case PR_EXCLUSIVE_ACCESS:
+ return 2;
+ case PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return 3;
+ case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return 4;
+ case PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return 5;
+ case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return 6;
+ default:
+ return 0;
+ }
+};
+
+static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
+ u64 key, u64 sa_key, u8 op)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+ struct nvme_command c;
+ u8 data[16] = { 0, };
+
+ put_unaligned_le64(key, &data[0]);
+ put_unaligned_le64(sa_key, &data[8]);
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = op;
+ c.common.nsid = cpu_to_le32(ns->ns_id);
+ c.common.cdw10[0] = cpu_to_le32(cdw10);
+
+ return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
+}
+
+static int nvme_pr_register(struct block_device *bdev, u64 old,
+ u64 new, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = old ? 2 : 0;
+ cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
+ cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_reserve(struct block_device *bdev, u64 key,
+ enum pr_type type, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = nvme_pr_type(type) << 8;
+ cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+ enum pr_type type, bool abort)
+{
+ u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_clear(struct block_device *bdev, u64 key)
+{
+ u32 cdw10 = 1 | key ? 1 << 3 : 0;
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static const struct pr_ops nvme_pr_ops = {
+ .pr_register = nvme_pr_register,
+ .pr_reserve = nvme_pr_reserve,
+ .pr_release = nvme_pr_release,
+ .pr_preempt = nvme_pr_preempt,
+ .pr_clear = nvme_pr_clear,
+};
+
+static const struct block_device_operations nvme_fops = {
+ .owner = THIS_MODULE,
+ .ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
+ .open = nvme_open,
+ .release = nvme_release,
+ .getgeo = nvme_getgeo,
+ .revalidate_disk= nvme_revalidate_disk,
+ .pr_ops = &nvme_pr_ops,
+};
+
+/*
+ * Initialize the cached copies of the Identify data and various controller
+ * register in our nvme_ctrl structure. This should be called as soon as
+ * the admin queue is fully up and running.
+ */
+int nvme_init_identify(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ u64 cap;
+ int ret, page_shift;
+
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+ return ret;
+ }
+
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+ return ret;
+ }
+ page_shift = NVME_CAP_MPSMIN(cap) + 12;
+ ctrl->page_size = 1 << page_shift;
+
+ if (ctrl->vs >= NVME_VS(1, 1))
+ ctrl->subsystem = NVME_CAP_NSSRC(cap);
+
+ ret = nvme_identify_ctrl(ctrl, &id);
+ if (ret) {
+ dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+ return -EIO;
+ }
+
+ ctrl->oncs = le16_to_cpup(&id->oncs);
+ atomic_set(&ctrl->abort_limit, id->acl + 1);
+ ctrl->vwc = id->vwc;
+ memcpy(ctrl->serial, id->sn, sizeof(id->sn));
+ memcpy(ctrl->model, id->mn, sizeof(id->mn));
+ memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
+ if (id->mdts)
+ ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
+ else
+ ctrl->max_hw_sectors = UINT_MAX;
+
+ if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
+ unsigned int max_hw_sectors;
+
+ ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
+ max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
+ if (ctrl->max_hw_sectors) {
+ ctrl->max_hw_sectors = min(max_hw_sectors,
+ ctrl->max_hw_sectors);
+ } else {
+ ctrl->max_hw_sectors = max_hw_sectors;
+ }
+ }
+
+ kfree(id);
+ return 0;
+}
+
+static int nvme_dev_open(struct inode *inode, struct file *file)
+{
+ struct nvme_ctrl *ctrl;
+ int instance = iminor(inode);
+ int ret = -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
+ if (ctrl->instance != instance)
+ continue;
+
+ if (!ctrl->admin_q) {
+ ret = -EWOULDBLOCK;
+ break;
+ }
+ if (!kref_get_unless_zero(&ctrl->kref))
+ break;
+ file->private_data = ctrl;
+ ret = 0;
+ break;
+ }
+ spin_unlock(&dev_list_lock);
+
+ return ret;
+}
+
+static int nvme_dev_release(struct inode *inode, struct file *file)
+{
+ nvme_put_ctrl(file->private_data);
+ return 0;
+}
+
+static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct nvme_ctrl *ctrl = file->private_data;
+ void __user *argp = (void __user *)arg;
+ struct nvme_ns *ns;
+
+ switch (cmd) {
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ctrl, NULL, argp);
+ case NVME_IOCTL_IO_CMD:
+ if (list_empty(&ctrl->namespaces))
+ return -ENOTTY;
+ ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+ return nvme_user_cmd(ctrl, ns, argp);
+ case NVME_IOCTL_RESET:
+ dev_warn(ctrl->dev, "resetting controller\n");
+ return ctrl->ops->reset_ctrl(ctrl);
+ case NVME_IOCTL_SUBSYS_RESET:
+ return nvme_reset_subsystem(ctrl);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct file_operations nvme_dev_fops = {
+ .owner = THIS_MODULE,
+ .open = nvme_dev_open,
+ .release = nvme_dev_release,
+ .unlocked_ioctl = nvme_dev_ioctl,
+ .compat_ioctl = nvme_dev_ioctl,
+};
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ int ret;
+
+ ret = ctrl->ops->reset_ctrl(ctrl);
+ if (ret < 0)
+ return ret;
+ return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+ struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+ return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ if (ns->ns_id == nsid)
+ return ns;
+ if (ns->ns_id > nsid)
+ break;
+ }
+ return NULL;
+}
+
+static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+ struct gendisk *disk;
+ int node = dev_to_node(ctrl->dev);
+
+ ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
+ if (!ns)
+ return;
+
+ ns->queue = blk_mq_init_queue(ctrl->tagset);
+ if (IS_ERR(ns->queue))
+ goto out_free_ns;
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+ ns->queue->queuedata = ns;
+ ns->ctrl = ctrl;
+
+ disk = alloc_disk_node(0, node);
+ if (!disk)
+ goto out_free_queue;
+
+ kref_init(&ns->kref);
+ ns->ns_id = nsid;
+ ns->disk = disk;
+ ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
+
+ blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+ if (ctrl->max_hw_sectors) {
+ blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
+ blk_queue_max_segments(ns->queue,
+ (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
+ }
+ if (ctrl->stripe_size)
+ blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
+ if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+ blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
+ blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
+
+ disk->major = nvme_major;
+ disk->first_minor = 0;
+ disk->fops = &nvme_fops;
+ disk->private_data = ns;
+ disk->queue = ns->queue;
+ disk->driverfs_dev = ctrl->device;
+ disk->flags = GENHD_FL_EXT_DEVT;
+ sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
+
+ if (nvme_revalidate_disk(ns->disk))
+ goto out_free_disk;
+
+ list_add_tail(&ns->list, &ctrl->namespaces);
+ kref_get(&ctrl->kref);
+ if (ns->type != NVME_NS_LIGHTNVM)
+ add_disk(ns->disk);
+
+ return;
+ out_free_disk:
+ kfree(disk);
+ out_free_queue:
+ blk_cleanup_queue(ns->queue);
+ out_free_ns:
+ kfree(ns);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+ bool kill = nvme_io_incapable(ns->ctrl) &&
+ !blk_queue_dying(ns->queue);
+
+ if (kill)
+ blk_set_queue_dying(ns->queue);
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
+ del_gendisk(ns->disk);
+ }
+ if (kill || !blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_cleanup_queue(ns->queue);
+ }
+ list_del_init(&ns->list);
+ nvme_put_ns(ns);
+}
+
+static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ ns = nvme_find_ns(ctrl, nsid);
+ if (ns) {
+ if (revalidate_disk(ns->disk))
+ nvme_ns_remove(ns);
+ } else
+ nvme_alloc_ns(ctrl, nsid);
+}
+
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ struct nvme_ns *ns;
+ __le32 *ns_list;
+ unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+ int ret = 0;
+
+ ns_list = kzalloc(0x1000, GFP_KERNEL);
+ if (!ns_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_lists; i++) {
+ ret = nvme_identify_ns_list(ctrl, prev, ns_list);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < min(nn, 1024U); j++) {
+ nsid = le32_to_cpu(ns_list[j]);
+ if (!nsid)
+ goto out;
+
+ nvme_validate_ns(ctrl, nsid);
+
+ while (++prev < nsid) {
+ ns = nvme_find_ns(ctrl, prev);
+ if (ns)
+ nvme_ns_remove(ns);
+ }
+ }
+ nn -= j;
+ }
+ out:
+ kfree(ns_list);
+ return ret;
+}
+
+static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ struct nvme_ns *ns, *next;
+ unsigned i;
+
+ for (i = 1; i <= nn; i++)
+ nvme_validate_ns(ctrl, i);
+
+ list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
+ if (ns->ns_id > nn)
+ nvme_ns_remove(ns);
+ }
+}
+
+void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ unsigned nn;
+
+ if (nvme_identify_ctrl(ctrl, &id))
+ return;
+
+ nn = le32_to_cpu(id->nn);
+ if (ctrl->vs >= NVME_VS(1, 1)) {
+ if (!nvme_scan_ns_list(ctrl, nn))
+ goto done;
+ }
+ __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
+ done:
+ list_sort(NULL, &ctrl->namespaces, ns_cmp);
+ kfree(id);
+}
+
+void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns, *next;
+
+ list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
+ nvme_ns_remove(ns);
+}
+
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct nvme_ctrl *ctrl)
+{
+ int instance, error;
+
+ do {
+ if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ error = ida_get_new(&nvme_instance_ida, &instance);
+ spin_unlock(&dev_list_lock);
+ } while (error == -EAGAIN);
+
+ if (error)
+ return -ENODEV;
+
+ ctrl->instance = instance;
+ return 0;
+}
+
+static void nvme_release_instance(struct nvme_ctrl *ctrl)
+{
+ spin_lock(&dev_list_lock);
+ ida_remove(&nvme_instance_ida, ctrl->instance);
+ spin_unlock(&dev_list_lock);
+}
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+ {
+ device_remove_file(ctrl->device, &dev_attr_reset_controller);
+ device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
+
+ spin_lock(&dev_list_lock);
+ list_del(&ctrl->node);
+ spin_unlock(&dev_list_lock);
+}
+
+static void nvme_free_ctrl(struct kref *kref)
+{
+ struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+
+ put_device(ctrl->device);
+ nvme_release_instance(ctrl);
+
+ ctrl->ops->free_ctrl(ctrl);
+}
+
+void nvme_put_ctrl(struct nvme_ctrl *ctrl)
+{
+ kref_put(&ctrl->kref, nvme_free_ctrl);
+}
+
+/*
+ * Initialize a NVMe controller structures. This needs to be called during
+ * earliest initialization so that we have the initialized structured around
+ * during probing.
+ */
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+ const struct nvme_ctrl_ops *ops, u16 vendor,
+ unsigned long quirks)
+{
+ int ret;
+
+ INIT_LIST_HEAD(&ctrl->namespaces);
+ kref_init(&ctrl->kref);
+ ctrl->dev = dev;
+ ctrl->ops = ops;
+ ctrl->vendor = vendor;
+ ctrl->quirks = quirks;
+
+ ret = nvme_set_instance(ctrl);
+ if (ret)
+ goto out;
+
+ ctrl->device = device_create(nvme_class, ctrl->dev,
+ MKDEV(nvme_char_major, ctrl->instance),
+ dev, "nvme%d", ctrl->instance);
+ if (IS_ERR(ctrl->device)) {
+ ret = PTR_ERR(ctrl->device);
+ goto out_release_instance;
+ }
+ get_device(ctrl->device);
+ dev_set_drvdata(ctrl->device, ctrl);
+
+ ret = device_create_file(ctrl->device, &dev_attr_reset_controller);
+ if (ret)
+ goto out_put_device;
+
+ spin_lock(&dev_list_lock);
+ list_add_tail(&ctrl->node, &nvme_ctrl_list);
+ spin_unlock(&dev_list_lock);
+
+ return 0;
+
+out_put_device:
+ put_device(ctrl->device);
+ device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
+out_release_instance:
+ nvme_release_instance(ctrl);
+out:
+ return ret;
+}
+
+int __init nvme_core_init(void)
+{
+ int result;
+
+ result = register_blkdev(nvme_major, "nvme");
+ if (result < 0)
+ return result;
+ else if (result > 0)
+ nvme_major = result;
+
+ result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
+ &nvme_dev_fops);
+ if (result < 0)
+ goto unregister_blkdev;
+ else if (result > 0)
+ nvme_char_major = result;
+
+ nvme_class = class_create(THIS_MODULE, "nvme");
+ if (IS_ERR(nvme_class)) {
+ result = PTR_ERR(nvme_class);
+ goto unregister_chrdev;
+ }
+
+ return 0;
+
+ unregister_chrdev:
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ unregister_blkdev:
+ unregister_blkdev(nvme_major, "nvme");
+ return result;
+}
+
+void nvme_core_exit(void)
+{
+ unregister_blkdev(nvme_major, "nvme");
+ class_destroy(nvme_class);
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9444884..5c5f455 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1040,7 +1040,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
struct request *req;
int ret;
- req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
+ req = blk_mq_alloc_request(q, write, 0);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1093,7 +1093,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
struct nvme_cmd_info *cmd_info;
struct request *req;
- req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
+ req = blk_mq_alloc_request(dev->admin_q, WRITE,
+ BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1118,7 +1119,7 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
struct request *req;
struct nvme_cmd_info *cmd_rq;
- req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
+ req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1319,8 +1320,8 @@ static void nvme_abort_req(struct request *req)
if (!dev->abort_limit)
return;
- abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC,
- false);
+ abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
+ BLK_MQ_REQ_NOWAIT);
if (IS_ERR(abort_req))
return;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index daf17d7..7fc9296 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -188,8 +188,14 @@ void blk_mq_insert_request(struct request *, bool, bool, bool);
void blk_mq_free_request(struct request *rq);
void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
+
+enum {
+ BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
+ BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
+};
+
struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
- gfp_t gfp, bool reserved);
+ unsigned int flags);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
--
1.9.1
More information about the Linux-nvme
mailing list