[RFC] early_fault handler

Matthew Wilcox willy at linux.intel.com
Wed Nov 13 08:47:07 EST 2013


Looking to minimise the changes to the VM, here's a small piece of
the puzzle.

Instead of adding a new special case at the start of handle_mm_fault,
introduce a new vm_op, the ->early_fault method.  Convert both hugetlb
and the XIP hpage code to use it.  Patch compile-tested only.

(Mostly the patch is so big because I have to change the calling
convention of either hugetlb or hpage_xip.  I chose the latter for
obvious reasons).

---

 fs/pmfs/xip.c      |   36 +++++++++++++++++++-----------------
 include/linux/mm.h |    2 ++
 mm/hugetlb.c       |    1 +
 mm/memory.c        |   27 ++++++---------------------
 4 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/fs/pmfs/xip.c b/fs/pmfs/xip.c
index c7471be..f6ee792 100644
--- a/fs/pmfs/xip.c
+++ b/fs/pmfs/xip.c
@@ -551,29 +551,31 @@ static pte_t pmfs_make_huge_pte(struct vm_area_struct *vma,
 	return entry;
 }
 
-static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
-					struct vm_fault *vmf)
+static int __pmfs_xip_file_hpage_fault(struct mm_struct *mm,
+				struct vm_area_struct *vma,
+				unsigned long user_address, unsigned int flags)
 {
 	int ret;
 	pte_t *ptep, new_pte;
 	unsigned long size, block_sz;
-	struct mm_struct *mm = vma->vm_mm;
 	struct inode *inode = vma->vm_file->f_mapping->host;
-	unsigned long address = (unsigned long)vmf->virtual_address;
+	unsigned long address = user_address & PAGE_MASK;
+	unsigned long pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) +
+				vma->vm_pgoff;
 
 	static DEFINE_MUTEX(pmfs_instantiation_mutex);
 
 	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	if (vmf->pgoff >= size) {
+	if (pgoff >= size) {
 		pmfs_dbg("[%s:%d] pgoff >= size(SIGBUS). vm_start(0x%lx),"
 			" vm_end(0x%lx), pgoff(0x%lx), VA(%lx)\n",
 			__func__, __LINE__, vma->vm_start, vma->vm_end,
-			vmf->pgoff, (unsigned long)vmf->virtual_address);
+			pgoff, address);
 		return VM_FAULT_SIGBUS;
 	}
 
-	block_sz = pmfs_data_block_size(vma, address, vmf->pgoff);
+	block_sz = pmfs_data_block_size(vma, address, pgoff);
 	address &= ~(block_sz - 1);
 	BUG_ON(block_sz == PAGE_SIZE);
 	pmfs_dbg_mmapvv("[%s:%d] BlockSz : %lx",
@@ -584,7 +586,7 @@ static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
 		pmfs_dbg("[%s:%d] pmfs_pte_alloc failed(OOM). vm_start(0x%lx),"
 			" vm_end(0x%lx), pgoff(0x%lx), VA(%lx)\n",
 			__func__, __LINE__, vma->vm_start, vma->vm_end,
-			vmf->pgoff, (unsigned long)vmf->virtual_address);
+			pgoff, address);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -596,13 +598,12 @@ static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
 	if (pte_none(*ptep)) {
 		void *xip_mem;
 		unsigned long xip_pfn;
-		if (pmfs_get_xip_mem(vma->vm_file->f_mapping, vmf->pgoff, 1,
+		if (pmfs_get_xip_mem(vma->vm_file->f_mapping, pgoff, 1,
 				      &xip_mem, &xip_pfn) != 0) {
 			pmfs_dbg("[%s:%d] get_xip_mem failed(OOM). vm_start(0x"
 				"%lx), vm_end(0x%lx), pgoff(0x%lx), VA(%lx)\n",
 				__func__, __LINE__, vma->vm_start,
-				vma->vm_end, vmf->pgoff,
-				(unsigned long)vmf->virtual_address);
+				vma->vm_end, pgoff, address);
 			ret = VM_FAULT_SIGBUS;
 			goto out_mutex;
 		}
@@ -620,7 +621,7 @@ static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
 		spin_unlock(&mm->page_table_lock);
 
 		if (ptep_set_access_flags(vma, address, ptep, new_pte,
-					  vmf->flags & FAULT_FLAG_WRITE))
+					  flags & FAULT_FLAG_WRITE))
 			update_mmu_cache(vma, address, ptep);
 	}
 	ret = VM_FAULT_NOPAGE;
@@ -630,13 +631,14 @@ out_mutex:
 	return ret;
 }
 
-static int pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
-							struct vm_fault *vmf)
+static int pmfs_xip_file_hpage_fault(struct mm_struct *mm,
+				struct vm_area_struct *vma,
+				unsigned long address, unsigned int flags)
 {
-	int ret = 0;
+	int ret;
 
 	rcu_read_lock();
-	ret = __pmfs_xip_file_hpage_fault(vma, vmf);
+	ret = __pmfs_xip_file_hpage_fault(mm, vma, address, flags);
 	rcu_read_unlock();
 	return ret;
 }
@@ -646,7 +648,7 @@ static const struct vm_operations_struct pmfs_xip_vm_ops = {
 };
 
 static const struct vm_operations_struct pmfs_xip_hpage_vm_ops = {
-	.fault	= pmfs_xip_file_hpage_fault,
+	.early_fault	= pmfs_xip_file_hpage_fault,
 };
 
 static inline int pmfs_has_huge_mmap(struct super_block *sb)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 91b5426..8685bb1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -206,6 +206,8 @@ struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+	int (*early_fault)(struct mm_struct *, struct vm_area_struct *,
+				unsigned long address, unsigned int flags);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b60f330..e94a827 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,6 +2269,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 const struct vm_operations_struct hugetlb_vm_ops = {
 	.fault = hugetlb_vm_op_fault,
+	.early_fault = hugetlb_fault,
 	.open = hugetlb_vm_op_open,
 	.close = hugetlb_vm_op_close,
 };
diff --git a/mm/memory.c b/mm/memory.c
index 6e91ac7..cee48ab 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3847,23 +3847,8 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* do counter updates before entering really critical section. */
 	check_sync_rss_stat(current);
 
-	/* FIXME : Can't find a single flag in vm_area_struct->vma_flags.  */
-	if (is_xip_hugetlb_mapping(vma))
-	{
-		int err;
-		struct vm_fault vmf;
-		vmf.virtual_address = (void __user *)(address & PAGE_MASK);
-		vmf.pgoff = (((address & PAGE_MASK) - vma->vm_start) >> PAGE_SHIFT)
-															+ vma->vm_pgoff;
-		vmf.flags = flags;
-		vmf.page = NULL;
-		err = vma->vm_ops->fault(vma, &vmf);
-		if (!err || (err == VM_FAULT_NOPAGE))
-			return 0;
-	}
-
-	if (unlikely(is_vm_hugetlb_page(vma)))
-		return hugetlb_fault(mm, vma, address, flags);
+	if (unlikely(vma->vm_ops->early_fault))
+		return vma->vm_ops->early_fault(mm, vma, address, flags);
 
 retry:
 	pgd = pgd_offset(mm, address);
@@ -3998,7 +3983,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 /****************************************************************************/
 /* XIP_HUGETLB support */
 pte_t *pte_offset_pagesz(struct mm_struct *mm, unsigned long addr,
-													unsigned long *sz)
+							unsigned long *sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -4023,7 +4008,7 @@ pte_t *pte_offset_pagesz(struct mm_struct *mm, unsigned long addr,
 EXPORT_SYMBOL(pte_offset_pagesz);
 
 pte_t *pte_alloc_pagesz(struct mm_struct *mm, unsigned long addr, 
-													unsigned long sz)
+							unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -4046,7 +4031,7 @@ pte_t *pte_alloc_pagesz(struct mm_struct *mm, unsigned long addr,
 EXPORT_SYMBOL(pte_alloc_pagesz);
 
 static void __unmap_xip_hugetlb_range(struct vm_area_struct *vma,
-								unsigned long start, unsigned long end)
+					unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -4073,7 +4058,7 @@ static void __unmap_xip_hugetlb_range(struct vm_area_struct *vma,
 }
 
 void unmap_xip_hugetlb_range(struct vm_area_struct *vma,
-							unsigned long start, unsigned long end)
+					unsigned long start, unsigned long end)
 {
 	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	__unmap_xip_hugetlb_range(vma, start, end);




More information about the Linux-pmfs mailing list