[RFC PATCH 4/4] io_uring: add async passthrough ioctl support
Kanchan Joshi
joshi.k at samsung.com
Wed Jan 27 10:00:29 EST 2021
Introduce IORING_OP_IOCTL_PT for async ioctl. It skips entering into
block-layer and reaches to underlying block-driver managing the
block-device. This is done by calling newly introduced "async_ioctl"
block-device operation.
The requested operation may be completed synchronously, and in that case
CQE is updated on the fly. For asynchronous update, lower-layer calls
the completion-callback supplied by io-uring.
Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g at samsung.com>
---
fs/io_uring.c | 77 +++++++++++++++++++++++++++++++++++
include/uapi/linux/io_uring.h | 7 +++-
2 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 985a9e3f976d..c15852dfb727 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -468,6 +468,19 @@ struct io_rw {
u64 len;
};
+/*
+ * passthru ioctl skips block-layer and reaches to block device driver via
+ * async_ioctl() block-dev operation.
+ */
+struct io_pt_ioctl {
+ struct file *file;
+ /* arg and cmd like regular ioctl */
+ u64 arg;
+ u32 cmd;
+ /* defined by block layer */
+ struct pt_ioctl_ctx ioctx;
+};
+
struct io_connect {
struct file *file;
struct sockaddr __user *addr;
@@ -699,6 +712,7 @@ struct io_kiocb {
struct io_shutdown shutdown;
struct io_rename rename;
struct io_unlink unlink;
+ struct io_pt_ioctl ptioctl;
/* use only after cleaning per-op data, see io_clean_op() */
struct io_completion compl;
};
@@ -824,6 +838,10 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.work_flags = IO_WQ_WORK_BLKCG,
},
+ [IORING_OP_IOCTL_PT] = {
+ .needs_file = 1,
+ .work_flags = IO_WQ_WORK_MM,
+ },
[IORING_OP_READ_FIXED] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
@@ -3704,6 +3722,60 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
return ret;
}
+static int io_pt_ioctl_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ unsigned int cmd = READ_ONCE(sqe->ioctl_cmd);
+ unsigned long arg = READ_ONCE(sqe->ioctl_arg);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct block_device *bdev = I_BDEV(req->file->f_mapping->host);
+ struct gendisk *disk = NULL;
+
+ disk = bdev->bd_disk;
+ if (!disk || !disk->fops || !disk->fops->async_ioctl)
+ return -EOPNOTSUPP;
+ /* for sqpoll, use sqo_task */
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ req->ptioctl.ioctx.task = ctx->sqo_task;
+ else
+ req->ptioctl.ioctx.task = current;
+
+ req->ptioctl.arg = arg;
+ req->ptioctl.cmd = cmd;
+ return 0;
+}
+
+void pt_complete(struct pt_ioctl_ctx *ptioc, long ret)
+{
+ struct io_kiocb *req = container_of(ptioc, struct io_kiocb, ptioctl.ioctx);
+
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_req_complete(req, ret);
+}
+
+static int io_pt_ioctl(struct io_kiocb *req, bool force_nonblock)
+{
+ long ret = 0;
+ struct block_device *bdev = I_BDEV(req->file->f_mapping->host);
+ fmode_t mode = req->file->f_mode;
+ struct gendisk *disk = NULL;
+
+ disk = bdev->bd_disk;
+ /* set up callback for async */
+ req->ptioctl.ioctx.pt_complete = pt_complete;
+
+ ret = disk->fops->async_ioctl(bdev, mode, req->ptioctl.cmd,
+ req->ptioctl.arg, &req->ptioctl.ioctx);
+ if (ret == -EIOCBQUEUED) /*async completion */
+ return 0;
+ if (ret < 0)
+ req_set_fail_links(req);
+
+ io_req_complete(req, ret);
+ return 0;
+}
+
static int io_renameat_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -6078,6 +6150,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_renameat_prep(req, sqe);
case IORING_OP_UNLINKAT:
return io_unlinkat_prep(req, sqe);
+ case IORING_OP_IOCTL_PT:
+ return io_pt_ioctl_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6337,6 +6411,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
case IORING_OP_UNLINKAT:
ret = io_unlinkat(req, force_nonblock);
break;
+ case IORING_OP_IOCTL_PT:
+ ret = io_pt_ioctl(req, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d31a2a1e8ef9..60671e2b00ba 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -22,12 +22,16 @@ struct io_uring_sqe {
union {
__u64 off; /* offset into file */
__u64 addr2;
+ __u64 ioctl_arg;
};
union {
__u64 addr; /* pointer to buffer or iovecs */
__u64 splice_off_in;
};
- __u32 len; /* buffer size or number of iovecs */
+ union {
+ __u32 len; /* buffer size or number of iovecs */
+ __u32 ioctl_cmd;
+ };
union {
__kernel_rwf_t rw_flags;
__u32 fsync_flags;
@@ -137,6 +141,7 @@ enum {
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
+ IORING_OP_IOCTL_PT,
/* this goes last, obviously */
IORING_OP_LAST,
--
2.25.1
More information about the Linux-nvme
mailing list