[RFC PATCH 4/4] io_uring: add async passthrough ioctl support

Kanchan Joshi joshi.k at samsung.com
Wed Jan 27 10:00:29 EST 2021


Introduce IORING_OP_IOCTL_PT for async ioctl. It skips entering into
block-layer and reaches to underlying block-driver managing the
block-device. This is done by calling newly introduced "async_ioctl"
block-device operation.
The requested operation may be completed synchronously, and in that case
CQE is updated on the fly. For asynchronous update, lower-layer calls
the completion-callback supplied by io-uring.

Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g at samsung.com>
---
 fs/io_uring.c                 | 77 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  7 +++-
 2 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 985a9e3f976d..c15852dfb727 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -468,6 +468,19 @@ struct io_rw {
 	u64				len;
 };
 
+/*
+ * passthru ioctl skips block-layer and reaches to block device driver via
+ * async_ioctl() block-dev operation.
+ */
+struct io_pt_ioctl {
+	struct file			*file;
+	/* arg and cmd like regular ioctl */
+	u64				arg;
+	u32				cmd;
+	/* defined by block layer */
+	struct pt_ioctl_ctx		ioctx;
+};
+
 struct io_connect {
 	struct file			*file;
 	struct sockaddr __user		*addr;
@@ -699,6 +712,7 @@ struct io_kiocb {
 		struct io_shutdown	shutdown;
 		struct io_rename	rename;
 		struct io_unlink	unlink;
+		struct io_pt_ioctl	ptioctl;
 		/* use only after cleaning per-op data, see io_clean_op() */
 		struct io_completion	compl;
 	};
@@ -824,6 +838,10 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_file		= 1,
 		.work_flags		= IO_WQ_WORK_BLKCG,
 	},
+	[IORING_OP_IOCTL_PT] = {
+		.needs_file		= 1,
+		.work_flags		= IO_WQ_WORK_MM,
+	},
 	[IORING_OP_READ_FIXED] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
@@ -3704,6 +3722,60 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 	return ret;
 }
 
+static int io_pt_ioctl_prep(struct io_kiocb *req,
+			    const struct io_uring_sqe *sqe)
+{
+	unsigned int cmd = READ_ONCE(sqe->ioctl_cmd);
+	unsigned long arg = READ_ONCE(sqe->ioctl_arg);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct block_device *bdev = I_BDEV(req->file->f_mapping->host);
+	struct gendisk *disk = NULL;
+
+	disk = bdev->bd_disk;
+	if (!disk || !disk->fops || !disk->fops->async_ioctl)
+		return -EOPNOTSUPP;
+	/* for sqpoll, use sqo_task */
+	if (ctx->flags & IORING_SETUP_SQPOLL)
+		req->ptioctl.ioctx.task = ctx->sqo_task;
+	else
+		req->ptioctl.ioctx.task = current;
+
+	req->ptioctl.arg = arg;
+	req->ptioctl.cmd = cmd;
+	return 0;
+}
+
+void pt_complete(struct pt_ioctl_ctx *ptioc, long ret)
+{
+	struct io_kiocb *req = container_of(ptioc, struct io_kiocb, ptioctl.ioctx);
+
+	if (ret < 0)
+		req_set_fail_links(req);
+	io_req_complete(req, ret);
+}
+
+static int io_pt_ioctl(struct io_kiocb *req, bool force_nonblock)
+{
+	long ret = 0;
+	struct block_device *bdev = I_BDEV(req->file->f_mapping->host);
+	fmode_t mode = req->file->f_mode;
+	struct gendisk *disk = NULL;
+
+	disk = bdev->bd_disk;
+	/* set up callback for async */
+	req->ptioctl.ioctx.pt_complete = pt_complete;
+
+	ret = disk->fops->async_ioctl(bdev, mode, req->ptioctl.cmd,
+				req->ptioctl.arg, &req->ptioctl.ioctx);
+	if (ret == -EIOCBQUEUED) /*async completion */
+		return 0;
+	if (ret < 0)
+		req_set_fail_links(req);
+
+	io_req_complete(req, ret);
+	return 0;
+}
+
 static int io_renameat_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
@@ -6078,6 +6150,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_renameat_prep(req, sqe);
 	case IORING_OP_UNLINKAT:
 		return io_unlinkat_prep(req, sqe);
+	case IORING_OP_IOCTL_PT:
+		return io_pt_ioctl_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6337,6 +6411,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
 	case IORING_OP_UNLINKAT:
 		ret = io_unlinkat(req, force_nonblock);
 		break;
+	case IORING_OP_IOCTL_PT:
+		ret = io_pt_ioctl(req, force_nonblock);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d31a2a1e8ef9..60671e2b00ba 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -22,12 +22,16 @@ struct io_uring_sqe {
 	union {
 		__u64	off;	/* offset into file */
 		__u64	addr2;
+		__u64	ioctl_arg;
 	};
 	union {
 		__u64	addr;	/* pointer to buffer or iovecs */
 		__u64	splice_off_in;
 	};
-	__u32	len;		/* buffer size or number of iovecs */
+	union {
+		__u32	len;	/* buffer size or number of iovecs */
+		__u32	ioctl_cmd;
+	};
 	union {
 		__kernel_rwf_t	rw_flags;
 		__u32		fsync_flags;
@@ -137,6 +141,7 @@ enum {
 	IORING_OP_SHUTDOWN,
 	IORING_OP_RENAMEAT,
 	IORING_OP_UNLINKAT,
+	IORING_OP_IOCTL_PT,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
2.25.1




More information about the Linux-nvme mailing list