[RFC] early_fault handler
Matthew Wilcox
willy at linux.intel.com
Wed Nov 13 08:47:07 EST 2013
Looking to minimise the changes to the VM, here's a small piece of
the puzzle.
Instead of adding a new special case at the start of handle_mm_fault,
introduce a new vm_op, the ->early_fault method. Convert both hugetlb
and the XIP hpage code to use it. Patch compile-tested only.
(Mostly the patch is so big because I have to change the calling
convention of either hugetlb or hpage_xip. I chose the latter for
obvious reasons).
---
fs/pmfs/xip.c | 36 +++++++++++++++++++-----------------
include/linux/mm.h | 2 ++
mm/hugetlb.c | 1 +
mm/memory.c | 27 ++++++---------------------
4 files changed, 28 insertions(+), 38 deletions(-)
diff --git a/fs/pmfs/xip.c b/fs/pmfs/xip.c
index c7471be..f6ee792 100644
--- a/fs/pmfs/xip.c
+++ b/fs/pmfs/xip.c
@@ -551,29 +551,31 @@ static pte_t pmfs_make_huge_pte(struct vm_area_struct *vma,
return entry;
}
-static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int __pmfs_xip_file_hpage_fault(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long user_address, unsigned int flags)
{
int ret;
pte_t *ptep, new_pte;
unsigned long size, block_sz;
- struct mm_struct *mm = vma->vm_mm;
struct inode *inode = vma->vm_file->f_mapping->host;
- unsigned long address = (unsigned long)vmf->virtual_address;
+ unsigned long address = user_address & PAGE_MASK;
+ unsigned long pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) +
+ vma->vm_pgoff;
static DEFINE_MUTEX(pmfs_instantiation_mutex);
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (vmf->pgoff >= size) {
+ if (pgoff >= size) {
pmfs_dbg("[%s:%d] pgoff >= size(SIGBUS). vm_start(0x%lx),"
" vm_end(0x%lx), pgoff(0x%lx), VA(%lx)\n",
__func__, __LINE__, vma->vm_start, vma->vm_end,
- vmf->pgoff, (unsigned long)vmf->virtual_address);
+ pgoff, address);
return VM_FAULT_SIGBUS;
}
- block_sz = pmfs_data_block_size(vma, address, vmf->pgoff);
+ block_sz = pmfs_data_block_size(vma, address, pgoff);
address &= ~(block_sz - 1);
BUG_ON(block_sz == PAGE_SIZE);
pmfs_dbg_mmapvv("[%s:%d] BlockSz : %lx",
@@ -584,7 +586,7 @@ static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
pmfs_dbg("[%s:%d] pmfs_pte_alloc failed(OOM). vm_start(0x%lx),"
" vm_end(0x%lx), pgoff(0x%lx), VA(%lx)\n",
__func__, __LINE__, vma->vm_start, vma->vm_end,
- vmf->pgoff, (unsigned long)vmf->virtual_address);
+ pgoff, address);
return VM_FAULT_SIGBUS;
}
@@ -596,13 +598,12 @@ static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
if (pte_none(*ptep)) {
void *xip_mem;
unsigned long xip_pfn;
- if (pmfs_get_xip_mem(vma->vm_file->f_mapping, vmf->pgoff, 1,
+ if (pmfs_get_xip_mem(vma->vm_file->f_mapping, pgoff, 1,
&xip_mem, &xip_pfn) != 0) {
pmfs_dbg("[%s:%d] get_xip_mem failed(OOM). vm_start(0x"
"%lx), vm_end(0x%lx), pgoff(0x%lx), VA(%lx)\n",
__func__, __LINE__, vma->vm_start,
- vma->vm_end, vmf->pgoff,
- (unsigned long)vmf->virtual_address);
+ vma->vm_end, pgoff, address);
ret = VM_FAULT_SIGBUS;
goto out_mutex;
}
@@ -620,7 +621,7 @@ static int __pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
spin_unlock(&mm->page_table_lock);
if (ptep_set_access_flags(vma, address, ptep, new_pte,
- vmf->flags & FAULT_FLAG_WRITE))
+ flags & FAULT_FLAG_WRITE))
update_mmu_cache(vma, address, ptep);
}
ret = VM_FAULT_NOPAGE;
@@ -630,13 +631,14 @@ out_mutex:
return ret;
}
-static int pmfs_xip_file_hpage_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int pmfs_xip_file_hpage_fault(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
{
- int ret = 0;
+ int ret;
rcu_read_lock();
- ret = __pmfs_xip_file_hpage_fault(vma, vmf);
+ ret = __pmfs_xip_file_hpage_fault(mm, vma, address, flags);
rcu_read_unlock();
return ret;
}
@@ -646,7 +648,7 @@ static const struct vm_operations_struct pmfs_xip_vm_ops = {
};
static const struct vm_operations_struct pmfs_xip_hpage_vm_ops = {
- .fault = pmfs_xip_file_hpage_fault,
+ .early_fault = pmfs_xip_file_hpage_fault,
};
static inline int pmfs_has_huge_mmap(struct super_block *sb)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 91b5426..8685bb1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -206,6 +206,8 @@ struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ int (*early_fault)(struct mm_struct *, struct vm_area_struct *,
+ unsigned long address, unsigned int flags);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b60f330..e94a827 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,6 +2269,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
const struct vm_operations_struct hugetlb_vm_ops = {
.fault = hugetlb_vm_op_fault,
+ .early_fault = hugetlb_fault,
.open = hugetlb_vm_op_open,
.close = hugetlb_vm_op_close,
};
diff --git a/mm/memory.c b/mm/memory.c
index 6e91ac7..cee48ab 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3847,23 +3847,8 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
- /* FIXME : Can't find a single flag in vm_area_struct->vma_flags. */
- if (is_xip_hugetlb_mapping(vma))
- {
- int err;
- struct vm_fault vmf;
- vmf.virtual_address = (void __user *)(address & PAGE_MASK);
- vmf.pgoff = (((address & PAGE_MASK) - vma->vm_start) >> PAGE_SHIFT)
- + vma->vm_pgoff;
- vmf.flags = flags;
- vmf.page = NULL;
- err = vma->vm_ops->fault(vma, &vmf);
- if (!err || (err == VM_FAULT_NOPAGE))
- return 0;
- }
-
- if (unlikely(is_vm_hugetlb_page(vma)))
- return hugetlb_fault(mm, vma, address, flags);
+ if (unlikely(vma->vm_ops->early_fault))
+ return vma->vm_ops->early_fault(mm, vma, address, flags);
retry:
pgd = pgd_offset(mm, address);
@@ -3998,7 +3983,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
/****************************************************************************/
/* XIP_HUGETLB support */
pte_t *pte_offset_pagesz(struct mm_struct *mm, unsigned long addr,
- unsigned long *sz)
+ unsigned long *sz)
{
pgd_t *pgd;
pud_t *pud;
@@ -4023,7 +4008,7 @@ pte_t *pte_offset_pagesz(struct mm_struct *mm, unsigned long addr,
EXPORT_SYMBOL(pte_offset_pagesz);
pte_t *pte_alloc_pagesz(struct mm_struct *mm, unsigned long addr,
- unsigned long sz)
+ unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
@@ -4046,7 +4031,7 @@ pte_t *pte_alloc_pagesz(struct mm_struct *mm, unsigned long addr,
EXPORT_SYMBOL(pte_alloc_pagesz);
static void __unmap_xip_hugetlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
@@ -4073,7 +4058,7 @@ static void __unmap_xip_hugetlb_range(struct vm_area_struct *vma,
}
void unmap_xip_hugetlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end)
{
mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
__unmap_xip_hugetlb_range(vma, start, end);
More information about the Linux-pmfs
mailing list