[PATCH 1/5] PMFS: Add Persistent Memory File System
Ross Zwisler
ross.zwisler at linux.intel.com
Mon Apr 29 17:47:38 EDT 2013
> +e) We ran out of bits in vmaâs vm_flags field, so we reused a flag that is
> +guaranteed not to be used on x86_64.
In README.md, non-ASCII single quote in "vma's". This was probably a
copy-and-paste error from me.
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 70c0f3d..b94d591 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -28,6 +28,8 @@ config X86
> select HAVE_OPROFILE
> select HAVE_PCSPKR_PLATFORM
> select HAVE_PERF_EVENTS
> + select HAVE_IRQ_WORK
I don't think you probably meant to add this define? HAVE_IRQ_WORK has been
removed from the kernel as of 3.9, I believe.
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 90d8cc9..3be22d8 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -281,7 +281,22 @@ void * __init extend_brk(size_t size, size_t align)
> return ret;
> }
>
> -#ifdef CONFIG_X86_32
> +#ifdef CONFIG_X86_64
> +static void __init init_gbpages(void)
> +{
> + if (direct_gbpages && cpu_has_gbpages)
> + printk(KERN_INFO "Using GB pages for direct mapping\n");
> + else
> + {
> + printk(KERN_INFO "direct_gbpages(%d). cpu_has_gbpages(%d)."
> + "GB pages not supported.\n", direct_gbpages, cpu_has_gbpages);
> + direct_gbpages = 0;
> + }
> +}
> +#else
> +static inline void init_gbpages(void)
> +{
> +}
The define for init_gbpages has been moved to arch/x86/mm/init.c. We made a
small change to this function previously - we should make that same change in
arch/x86/mm/init.c instead of duplicating the modified code here.
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index e19ff30..f7a1aa7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -84,6 +84,7 @@ extern unsigned int kobjsize(const void *objp);
> #define VM_MAYSHARE 0x00000080
>
> #define VM_GROWSDOWN 0x00000100 /* general info on the segment */
> +#define VM_XIP_HUGETLB 0x00000200
Yay, a place for our flag!
> #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
> #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
>
> @@ -96,6 +97,7 @@ extern unsigned int kobjsize(const void *objp);
>
> #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
> #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
> +#define VM_PFN_AT_MMAP 0x00080000 /* PFNMAP vma that is fully mapped at mmap time */
Did you mean to add in this define? It isn't used anywhere?
> #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
> #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
> #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
> @@ -165,6 +167,11 @@ extern pgprot_t protection_map[16];
> #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
> #define FAULT_FLAG_TRIED 0x40 /* second try */
>
> +static inline int is_xip_hugetlb_mapping(struct vm_area_struct *vma)
> +{
> + return !!(vma->vm_flags & VM_XIP_HUGETLB);
> +}
> +
> /*
> * vm_fault is filled by the the pagefault handler and passed to the vma's
> * ->fault function. The vma's ->fault is responsible for returning a bitmask
> @@ -1010,6 +1017,14 @@ static inline int fixup_user_fault(struct task_struct *tsk,
> }
> #endif
>
> +extern pte_t *pte_alloc_pagesz(struct mm_struct *mm, unsigned long addr,
> + unsigned long sz);
> +extern pte_t *pte_offset_pagesz(struct mm_struct *mm, unsigned long addr,
> + unsigned long *sz);
> +extern void unmap_xip_hugetlb_range(struct vm_area_struct *vma,
> + unsigned long start, unsigned long end);
> +
> +
Extra line of whitespace.
> diff --git a/mm/memory.c b/mm/memory.c
> index 13cbc42..5a8bd23 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1054,6 +1054,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> return 0;
> }
>
> + /* FIXME: For now, don't copy ptes and let it fault. */
> + if (is_xip_hugetlb_mapping(vma))
> + return 0;
> +
> if (is_vm_hugetlb_page(vma))
> return copy_hugetlb_page_range(dst_mm, src_mm, vma);
>
> @@ -1353,6 +1357,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
> __unmap_hugepage_range_final(tlb, vma, start, end, NULL);
> mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
> }
> + } else if (is_xip_hugetlb_mapping(vma)) {
> + unmap_xip_hugetlb_range(vma, start, end);
> + start = end;
Spacing for these two lines is off.
> +int make_pages_present(unsigned long addr, unsigned long end)
> +{
> + int ret, len, write;
> + struct vm_area_struct * vma;
> +
> + vma = find_vma(current->mm, addr);
> + if (!vma)
> + return -ENOMEM;
> + /*
> + * We want to touch writable mappings with a write fault in order
> + * to break COW, except for shared mappings because these don't COW
> + * and we would not want to dirty them for nothing.
> + */
> + write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
> + BUG_ON(addr >= end);
> + BUG_ON(end > vma->vm_end);
> + len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
> + ret = get_user_pages(current, current->mm, addr,
> + len, write, 0, NULL, NULL);
> + if (ret < 0)
> + return ret;
> + return ret == len ? 0 : -EFAULT;
> +}
> +
We didn't add the function 'make_pages_present', and it isn't used in the new
3.9 code at all - I'm guessing it got copied by accident?
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 79b7cf7..f92ef16 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -173,6 +173,11 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma,
> VM_BUG_ON(end > vma->vm_end);
> VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
>
> + if (is_xip_hugetlb_mapping(vma)) {
> + vma->vm_flags &= ~VM_LOCKED;
> + return nr_pages;;
Double semicolon.
More information about the Linux-pmfs
mailing list