[PATCH] MTD: Retry Read/Write Transfer Buffer Allocations

Grant Erickson marathon96 at gmail.com
Mon Apr 4 14:39:49 EDT 2011


As promised, this is the alternate, work-in-progress implementation of
this patch using the more complex get_user_pages and iovecs to perform
a user-requested read or write operation, reusing the buffer passed
from user-space.

As implemented below, reads work 100% of the time for the few test
cases I've thrown at it (fw_printenv and nanddump). However, writes
will not work in this implementation unless the size and offset meet
the alignment requirements stipulated by nand_do_write_ops. Addressing
this will require deblocking the head and tail of the transfer.

I see this as a follow on, interative approach to the simpler,
short-term patch submitted by Jarkko and myself. Hopefully, we can
upstream that patch now and follow up with this later when complete.

Comments and tweaks welcomed.

Best,

Grant

---
 drivers/mtd/mtdchar.c |  342 ++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 265 insertions(+), 77 deletions(-)

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 145b3d0d..cd33128 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -51,6 +51,18 @@ struct mtd_file_info {
 	enum mtd_file_modes mode;
 };
 
+/*
+ * Data structure to handle scatter/gather transfer pages and
+ * page extents for user-space read and write requests.
+ */
+struct mtd_xio {
+	bool write;
+	struct page **pages;
+	unsigned long npages;
+	struct iovec *iovecs;
+	unsigned long niovecs;
+};
+
 static loff_t mtd_lseek (struct file *file, loff_t offset, int orig)
 {
 	struct mtd_file_info *mfi = file->private_data;
@@ -166,60 +178,170 @@ static int mtd_close(struct inode *inode, struct file *file)
 	return 0;
 } /* mtd_close */
 
-/* FIXME: This _really_ needs to die. In 2.5, we should lock the
-   userspace buffer down and use it directly with readv/writev.
-*/
-#define MAX_KMALLOC_SIZE 0x20000
+static inline bool pages_are_adjacent(struct page *first, struct page *second)
+{
+    return ((page_address(first) + PAGE_SIZE) == page_address(second));
+}
 
-static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t *ppos)
+static void pages_to_iovecs(size_t len, unsigned long off, struct page **pages, unsigned long npages, struct iovec *iovecs, unsigned long *niovecsp)
 {
-	struct mtd_file_info *mfi = file->private_data;
-	struct mtd_info *mtd = mfi->mtd;
-	size_t retlen=0;
-	size_t total_retlen=0;
-	int ret=0;
-	int len;
-	char *kbuf;
+	unsigned long iovec_len = 0;
+	size_t head_len, tail_len;
+	struct page *last_page, *curr_page;
+	void *address;
+	unsigned long p, v;
 
-	DEBUG(MTD_DEBUG_LEVEL0,"MTD_read\n");
+	/* Handle the head. */
 
-	if (*ppos + count > mtd->size)
-		count = mtd->size - *ppos;
+	head_len = min_t(size_t, len, PAGE_SIZE - off);
+	tail_len = (len - head_len) % PAGE_SIZE;
 
-	if (!count)
-		return 0;
+	iovecs[0].iov_base = page_address(pages[0]) + off;
+	iovecs[0].iov_len  = head_len;
 
-	/* FIXME: Use kiovec in 2.5 to lock down the user's buffers
-	   and pass them directly to the MTD functions */
+	/* Handle the body + tail. */
 
-	if (count > MAX_KMALLOC_SIZE)
-		kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
-	else
-		kbuf=kmalloc(count, GFP_KERNEL);
+	for (v = 0, p = 1; p < npages; p++) {
+		last_page = pages[p - 1];
+		curr_page = pages[p];
+		address = page_address(curr_page);
 
-	if (!kbuf)
-		return -ENOMEM;
+		if (!pages_are_adjacent(last_page, curr_page)) {
+		    iovecs[++v].iov_base = address;
+		}
+
+		iovecs[v].iov_len += PAGE_SIZE;
+	}
+
+	/* Fix-up the tail length. */
+
+	iovecs[v].iov_len -= (PAGE_SIZE - tail_len);
+
+	*niovecsp = v + 1;
+
+	for (v = 0; v < *niovecsp; v++) {
+		iovec_len += iovecs[v].iov_len;
+	}
+}
+
+static void put_pages(struct page **pages, unsigned long npages, bool write)
+{
+	unsigned long p;
+	struct page *page;
+
+	for (p = 0; p < npages; p++) {
+		page = pages[p];
+		if (write)
+			set_page_dirty_lock(page);
+		put_page(page);
+	}
+}
+
+static int mtd_get_user_pages(struct mtd_xio *mxio, void __user *base, size_t len, bool write)
+{
+	unsigned long ustart, off, npages, uend, niovecs;
+	struct page **pages;
+	struct iovec *iovecs;
+	int status;
+
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, base, len))) {
+		status = -EFAULT;
+		goto done;
+	}
+
+	ustart = (unsigned long)base & PAGE_MASK;
+	off = (unsigned long)base & ~PAGE_MASK;
+	npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	uend = (unsigned long)(base + len);
+
+	niovecs = npages;
+
+	pages = kmalloc(npages * sizeof (struct page *), GFP_KERNEL);
+	if (!pages) {
+		status = -ENOMEM;
+		goto done;
+	}
+
+	iovecs = kzalloc(niovecs * sizeof (struct iovec), GFP_KERNEL);
+	if (!iovecs) {
+		status = -ENOMEM;
+		goto done_free_pages;
+	}
+
+	status = get_user_pages_fast(ustart, npages, write, pages);
+	if (status < 0)
+		goto done_free_iovecs;
+
+	if (status != npages) {
+		npages = status;
+		status = -EFAULT;
+		goto done_pages;
+	}
+
+	pages_to_iovecs(len, off, pages, npages, iovecs, &niovecs);
+
+	mxio->write	= write;
+	mxio->pages	= pages;
+	mxio->npages	= npages;
+	mxio->iovecs	= iovecs;
+	mxio->niovecs	= niovecs;
+
+	return status;
+
+ done_pages:
+	put_pages(pages, npages, write);
+
+ done_free_iovecs:
+	kfree(iovecs);
+
+ done_free_pages:
+	kfree(pages);
+
+ done:
+	return status;
+}
+
+static void mtd_put_user_pages(struct mtd_xio *mxio)
+{
+	if (!mxio)
+		return;
+
+	if (mxio->npages && mxio->pages) {
+		put_pages(mxio->pages, mxio->npages, mxio->write);
+		kfree(mxio->pages);
+	}
+
+	if (mxio->niovecs && mxio->iovecs) {
+		kfree(mxio->iovecs);
+	}
+}
+
+static ssize_t mtd_do_read(struct file *file, char __kernel *buf, size_t count, loff_t *ppos)
+{
+	struct mtd_file_info *mfi = file->private_data;
+	struct mtd_info *mtd = mfi->mtd;
+	size_t retlen=0;
+	size_t total_retlen=0;
+	int ret=0;
+	int len;
 
 	while (count) {
 
-		if (count > MAX_KMALLOC_SIZE)
-			len = MAX_KMALLOC_SIZE;
-		else
-			len = count;
+		len = count;
 
 		switch (mfi->mode) {
 		case MTD_MODE_OTP_FACTORY:
-			ret = mtd->read_fact_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd->read_fact_prot_reg(mtd, *ppos, len, &retlen, buf);
 			break;
 		case MTD_MODE_OTP_USER:
-			ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, buf);
 			break;
 		case MTD_MODE_RAW:
 		{
 			struct mtd_oob_ops ops;
 
 			ops.mode = MTD_OOB_RAW;
-			ops.datbuf = kbuf;
+			ops.datbuf = buf;
 			ops.oobbuf = NULL;
 			ops.len = len;
 
@@ -228,7 +350,7 @@ static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t
 			break;
 		}
 		default:
-			ret = mtd->read(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd->read(mtd, *ppos, len, &retlen, buf);
 		}
 		/* Nand returns -EBADMSG on ecc errors, but it returns
 		 * the data. For our userspace tools it is important
@@ -241,12 +363,7 @@ static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t
 		 */
 		if (!ret || (ret == -EUCLEAN) || (ret == -EBADMSG)) {
 			*ppos += retlen;
-			if (copy_to_user(buf, kbuf, retlen)) {
-				kfree(kbuf);
-				return -EFAULT;
-			}
-			else
-				total_retlen += retlen;
+			total_retlen += retlen;
 
 			count -= retlen;
 			buf += retlen;
@@ -254,56 +371,26 @@ static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t
 				count = 0;
 		}
 		else {
-			kfree(kbuf);
 			return ret;
 		}
 
 	}
 
-	kfree(kbuf);
 	return total_retlen;
-} /* mtd_read */
+}
 
-static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count,loff_t *ppos)
+static ssize_t mtd_do_write(struct file *file, const char __kernel *buf, size_t count, loff_t *ppos)
 {
 	struct mtd_file_info *mfi = file->private_data;
 	struct mtd_info *mtd = mfi->mtd;
-	char *kbuf;
 	size_t retlen;
 	size_t total_retlen=0;
 	int ret=0;
 	int len;
 
-	DEBUG(MTD_DEBUG_LEVEL0,"MTD_write\n");
-
-	if (*ppos == mtd->size)
-		return -ENOSPC;
-
-	if (*ppos + count > mtd->size)
-		count = mtd->size - *ppos;
-
-	if (!count)
-		return 0;
-
-	if (count > MAX_KMALLOC_SIZE)
-		kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
-	else
-		kbuf=kmalloc(count, GFP_KERNEL);
-
-	if (!kbuf)
-		return -ENOMEM;
-
 	while (count) {
 
-		if (count > MAX_KMALLOC_SIZE)
-			len = MAX_KMALLOC_SIZE;
-		else
-			len = count;
-
-		if (copy_from_user(kbuf, buf, len)) {
-			kfree(kbuf);
-			return -EFAULT;
-		}
+	    	len = count;
 
 		switch (mfi->mode) {
 		case MTD_MODE_OTP_FACTORY:
@@ -314,7 +401,7 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
 				ret = -EOPNOTSUPP;
 				break;
 			}
-			ret = mtd->write_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd->write_user_prot_reg(mtd, *ppos, len, &retlen, (char __kernel *)buf);
 			break;
 
 		case MTD_MODE_RAW:
@@ -322,7 +409,7 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
 			struct mtd_oob_ops ops;
 
 			ops.mode = MTD_OOB_RAW;
-			ops.datbuf = kbuf;
+			ops.datbuf = (char __kernel *)buf;
 			ops.oobbuf = NULL;
 			ops.len = len;
 
@@ -332,7 +419,7 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
 		}
 
 		default:
-			ret = (*(mtd->write))(mtd, *ppos, len, &retlen, kbuf);
+			ret = (*(mtd->write))(mtd, *ppos, len, &retlen, buf);
 		}
 		if (!ret) {
 			*ppos += retlen;
@@ -341,13 +428,114 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
 			buf += retlen;
 		}
 		else {
-			kfree(kbuf);
 			return ret;
 		}
 	}
 
-	kfree(kbuf);
 	return total_retlen;
+}
+
+typedef ssize_t (*mtd_io_fn_t)(struct file *, char *, size_t, loff_t *);
+
+static ssize_t mtd_do_loop_readv_writev(struct file *file, const struct iovec *iov, unsigned long iovcnt, loff_t *ppos, mtd_io_fn_t fn)
+{
+	const struct iovec *vector = iov;
+	ssize_t ret = 0;
+
+	while (iovcnt > 0) {
+		void *base;
+		size_t len;
+		ssize_t nr;
+
+		base = vector->iov_base;
+		len = vector->iov_len;
+		vector++;
+		iovcnt--;
+
+		nr = fn(file, base, len, ppos);
+
+		if (nr < 0) {
+			if (!ret)
+				ret = nr;
+			break;
+		}
+		ret += nr;
+		if (nr != len)
+			break;
+	}
+
+	return ret;
+}
+
+static ssize_t mtd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct mtd_file_info *mfi = file->private_data;
+	struct mtd_info *mtd = mfi->mtd;
+	ssize_t ret;
+	size_t total_retlen=0;
+	struct mtd_xio mxio;
+
+	DEBUG(MTD_DEBUG_LEVEL0,"MTD_read\n");
+
+	if (*ppos + count > mtd->size)
+		count = mtd->size - *ppos;
+
+	if (!count)
+		return 0;
+
+	ret = mtd_get_user_pages(&mxio, buf, count, false);
+	if (ret < 0)
+	    goto done_put_pages;
+
+	total_retlen = mtd_do_loop_readv_writev(file,
+						mxio.iovecs,
+						mxio.niovecs,
+						ppos,
+						mtd_do_read);
+
+	ret = total_retlen;
+
+ done_put_pages:
+	mtd_put_user_pages(&mxio);
+
+	return ret;
+} /* mtd_read */
+
+static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct mtd_file_info *mfi = file->private_data;
+	struct mtd_info *mtd = mfi->mtd;
+	ssize_t ret;
+	size_t total_retlen=0;
+	struct mtd_xio mxio;
+
+	DEBUG(MTD_DEBUG_LEVEL0,"MTD_write\n");
+
+	if (*ppos == mtd->size)
+		return -ENOSPC;
+
+	if (*ppos + count > mtd->size)
+		count = mtd->size - *ppos;
+
+	if (!count)
+		return 0;
+
+	ret = mtd_get_user_pages(&mxio, (void __user *)buf, count, true);
+	if (ret < 0)
+	    goto done_put_pages;
+
+	total_retlen = mtd_do_loop_readv_writev(file,
+						mxio.iovecs,
+						mxio.niovecs,
+						ppos,
+						(mtd_io_fn_t)mtd_do_write);
+
+	ret = total_retlen;
+
+ done_put_pages:
+	mtd_put_user_pages(&mxio);
+
+	return ret;
 } /* mtd_write */
 
 /*======================================================================



More information about the linux-mtd mailing list