[PATCH V3] nvmet: add simple file backed ns support
chaitany kulkarni
ckulkarnilinux at gmail.com
Wed May 9 20:59:39 PDT 2018
Thanks for the comments Sagi, I'll update and send the patch, for
"call_[write|read]_iter()",
I removed it in this version since it was one of the review comment on V1.
Also for bvec_alloc() we have to maintain the mempool_t, can we not
add more members
when we can get away with kmalloc_array()?
Do you see any performance advantage/difference when using
bvec_alloc() vs kmalloc_array()?
On Wed, May 9, 2018 at 7:53 AM, Sagi Grimberg <sagi at grimberg.me> wrote:
> Chaitanya, this looks good. couple of comments below
>
>> This patch adds simple file backed namespace support for
>> NVMeOF target.
>>
>> In current implementation NVMeOF supports block
>> device backed namespaces on the target side.
>> With this implementation regular file(s) can be used to
>> initialize the namespace(s) via target side configfs
>> interface.
>>
>> For admin smart log command on the target side, we only use
>> block device backed namespaces to compute target
>> side smart log.
>>
>> We isolate the code for each ns handle type
>> (file/block device) and add wrapper routines to manipulate
>> the respective ns handle types.
>>
>> Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
>
>
> ...
>
>> +static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
>> +{
>> + if (ns->bdev) {
>> + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
>> + ns->bdev = NULL;
>> + } else if (ns->file) {
>> + fput(ns->file);
>> + ns->file = NULL;
>> + } else
>> + pr_err("invalid ns handle\n");
>
>
> WARN_ON_ONCE()
>
>> @@ -513,6 +574,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct
>> nvmet_cq *cq,
>> req->transfer_len = 0;
>> req->rsp->status = 0;
>> req->ns = NULL;
>> + req->bvec = NULL;
>> + memset(&req->iocb, 0, sizeof(req->iocb));
>
>
> Can we clear this just for file IO?
>
>> +static void nvmet_execute_rw_file(struct nvmet_req *req)
>> +{
>> + loff_t pos;
>> + ssize_t len = 0, ret;
>> + int bv_cnt = 0;
>> + struct iov_iter iter;
>> + struct sg_page_iter sg_pg_iter;
>> + int rw = req->cmd->rw.opcode == nvme_cmd_write ? WRITE : READ;
>> + u32 nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
>
>
> Not a must, but some people (including me) prefer an upside-down christmas
> tree arrangement.
>
>> +
>> + req->bvec = req->inline_bvec;
>> + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) {
>> + req->bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
>> + GFP_KERNEL);
>
>
> bvec_alloc()?
>
>
>> + if (!req->bvec)
>> + goto out;
>> + }
>> +
>> + for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
>> + req->bvec[bv_cnt].bv_page =
>> sg_page_iter_page(&sg_pg_iter);
>> + req->bvec[bv_cnt].bv_offset = sg_pg_iter.sg->offset;
>> + req->bvec[bv_cnt].bv_len = PAGE_SIZE -
>> sg_pg_iter.sg->offset;
>> + len += req->bvec[bv_cnt].bv_len;
>> + bv_cnt++;
>> + }
>> +
>> + if (len != req->data_len)
>> + goto free;
>> +
>> + iov_iter_bvec(&iter, ITER_BVEC | rw, req->bvec, bv_cnt, len);
>> + pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
>> + req->iocb.ki_pos = pos;
>> + req->iocb.ki_filp = req->ns->file;
>> + req->iocb.ki_flags = IOCB_DIRECT;
>> + req->iocb.ki_complete = nvmet_file_io_complete;
>> +
>> + if (req->cmd->rw.opcode == nvme_cmd_write) {
>> + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
>> + req->iocb.ki_flags |= IOCB_DSYNC;
>> + ret = req->ns->file->f_op->write_iter(&req->iocb, &iter);
>
>
> call_write_iter()?
>
>> + } else
>> + ret = req->ns->file->f_op->read_iter(&req->iocb, &iter);
>
>
> call_read_iter()?
>
>> +
>> + if (ret != -EIOCBQUEUED)
>> + nvmet_file_io_complete(&req->iocb, ret, 0);
>> + return;
>> +free:
>> + kfree(req->bvec);
>> +out:
>> + nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
>> +}
>> +
>> static void nvmet_execute_flush(struct nvmet_req *req)
>> {
>> struct bio *bio = &req->inline_bio;
>> @@ -102,6 +166,23 @@ static void nvmet_execute_flush(struct nvmet_req
>> *req)
>> submit_bio(bio);
>> }
>> +static void nvmet_flush_work_file(struct work_struct *work)
>> +{
>> + int ret;
>> + struct nvmet_req *req;
>> +
>> + req = container_of(work, struct nvmet_req, flush_work);
>> +
>> + ret = vfs_fsync(req->ns->file, 1);
>> +
>> + nvmet_req_complete(req, ret);
>
>
> We need nvme status code here.
>
>
>> +}
>> +
>> +static void nvmet_execute_flush_file(struct nvmet_req *req)
>> +{
>> + schedule_work(&req->flush_work);
>> +}
>> +
>> static u16 nvmet_discard_range(struct nvmet_ns *ns,
>> struct nvme_dsm_range *range, struct bio **bio)
>> {
>> @@ -163,6 +244,43 @@ static void nvmet_execute_dsm(struct nvmet_req *req)
>> }
>> }
>> +static void nvmet_execute_discard_file(struct nvmet_req *req)
>> +{
>> + struct nvme_dsm_range range;
>> + int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
>> + loff_t offset;
>> + loff_t len;
>> + int i, ret;
>> +
>> + for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
>> + if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
>> + sizeof(range)))
>> + break;
>> + offset = le64_to_cpu(range.slba) <<
>> req->ns->blksize_shift;
>> + len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
>> + ret = vfs_fallocate(req->ns->file, mode, offset, len);
>> + if (ret)
>> + break;
>> + }
>> +
>> + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR :
>> 0);
>> +}
>> +
>> +static void nvmet_execute_dsm_file(struct nvmet_req *req)
>> +{
>> + switch (le32_to_cpu(req->cmd->dsm.attributes)) {
>> + case NVME_DSMGMT_AD:
>> + nvmet_execute_discard_file(req);
>> + return;
>> + case NVME_DSMGMT_IDR:
>> + case NVME_DSMGMT_IDW:
>> + default:
>> + /* Not supported yet */
>> + nvmet_req_complete(req, 0);
>> + return;
>> + }
>> +}
>> +
>> static void nvmet_execute_write_zeroes(struct nvmet_req *req)
>> {
>> struct nvme_write_zeroes_cmd *write_zeroes =
>> &req->cmd->write_zeroes;
>> @@ -189,6 +307,22 @@ static void nvmet_execute_write_zeroes(struct
>> nvmet_req *req)
>> }
>> }
>> +static void nvmet_execute_write_zeroes_file(struct nvmet_req *req)
>> +{
>> + struct nvme_write_zeroes_cmd *write_zeroes =
>> &req->cmd->write_zeroes;
>> + loff_t offset;
>> + loff_t len;
>> + int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
>> + int ret;
>> +
>> + offset = le64_to_cpu(write_zeroes->slba) <<
>> req->ns->blksize_shift;
>> + len = (((sector_t)le32_to_cpu(write_zeroes->length) + 1) <<
>> + req->ns->blksize_shift);
>> +
>> + ret = vfs_fallocate(req->ns->file, mode, offset, len);
>> + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR :
>> 0);
>> +}
>> +
>
>
> ALl the above should be async I think..
>
>
>> u16 nvmet_parse_io_cmd(struct nvmet_req *req)
>> {
>> struct nvme_command *cmd = req->cmd;
>> @@ -207,20 +341,33 @@ u16 nvmet_parse_io_cmd(struct nvmet_req *req)
>> switch (cmd->common.opcode) {
>> case nvme_cmd_read:
>> case nvme_cmd_write:
>> - req->execute = nvmet_execute_rw;
>> + if (req->ns->file)
>> + req->execute = nvmet_execute_rw_file;
>> + else
>> + req->execute = nvmet_execute_rw;
>> req->data_len = nvmet_rw_len(req);
>> return 0;
>> case nvme_cmd_flush:
>> - req->execute = nvmet_execute_flush;
>> + if (req->ns->file) {
>> + req->execute = nvmet_execute_flush_file;
>> + INIT_WORK(&req->flush_work,
>> nvmet_flush_work_file);
>> + } else
>> + req->execute = nvmet_execute_flush;
>> req->data_len = 0;
>> return 0;
>> case nvme_cmd_dsm:
>> - req->execute = nvmet_execute_dsm;
>> + if (req->ns->file)
>> + req->execute = nvmet_execute_dsm_file;
>> + else
>> + req->execute = nvmet_execute_dsm;
>> req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
>> sizeof(struct nvme_dsm_range);
>> return 0;
>> case nvme_cmd_write_zeroes:
>> - req->execute = nvmet_execute_write_zeroes;
>> + if (req->ns->file)
>> + req->execute = nvmet_execute_write_zeroes_file;
>> + else
>> + req->execute = nvmet_execute_write_zeroes;
>> return 0;
>> default:
>> pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
>
>
> I support separating parse too.
>
>
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme
More information about the Linux-nvme
mailing list