[PATCH 1/5] PMFS: Add Persistent Memory File System

Ross Zwisler ross.zwisler at linux.intel.com
Mon Apr 29 17:47:38 EDT 2013


> +e) We ran out of bits in vma’s vm_flags field, so we reused a flag that is
> +guaranteed not to be used on x86_64.

In README.md, non-ASCII single quote in "vma's".  This was probably a
copy-and-paste error from me.

> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 70c0f3d..b94d591 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -28,6 +28,8 @@ config X86
>  	select HAVE_OPROFILE
>  	select HAVE_PCSPKR_PLATFORM
>  	select HAVE_PERF_EVENTS
> +	select HAVE_IRQ_WORK

I don't think you probably meant to add this define?  HAVE_IRQ_WORK has been
removed from the kernel as of 3.9, I believe.

> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 90d8cc9..3be22d8 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -281,7 +281,22 @@ void * __init extend_brk(size_t size, size_t align)
>  	return ret;
>  }
>  
> -#ifdef CONFIG_X86_32
> +#ifdef CONFIG_X86_64
> +static void __init init_gbpages(void)
> +{
> +	if (direct_gbpages && cpu_has_gbpages)
> +		printk(KERN_INFO "Using GB pages for direct mapping\n");
> +	else
> +	{
> +		printk(KERN_INFO "direct_gbpages(%d). cpu_has_gbpages(%d)."
> +				"GB pages not supported.\n", direct_gbpages, cpu_has_gbpages);
> +		direct_gbpages = 0;
> +	}
> +}
> +#else
> +static inline void init_gbpages(void)
> +{
> +}

The define for init_gbpages has been moved to arch/x86/mm/init.c.  We made a
small change to this function previously - we should make that same change in
arch/x86/mm/init.c instead of duplicating the modified code here.

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index e19ff30..f7a1aa7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -84,6 +84,7 @@ extern unsigned int kobjsize(const void *objp);
>  #define VM_MAYSHARE	0x00000080
>  
>  #define VM_GROWSDOWN	0x00000100	/* general info on the segment */
> +#define VM_XIP_HUGETLB	0x00000200

Yay, a place for our flag!

>  #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
>  #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
>  
> @@ -96,6 +97,7 @@ extern unsigned int kobjsize(const void *objp);
>  
>  #define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
>  #define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
> +#define VM_PFN_AT_MMAP	0x00080000	/* PFNMAP vma that is fully mapped at mmap time */

Did you mean to add in this define?  It isn't used anywhere?

>  #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
>  #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
>  #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
> @@ -165,6 +167,11 @@ extern pgprot_t protection_map[16];
>  #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
>  #define FAULT_FLAG_TRIED	0x40	/* second try */
>  
> +static inline int is_xip_hugetlb_mapping(struct vm_area_struct *vma)
> +{
> +	return !!(vma->vm_flags & VM_XIP_HUGETLB);
> +}
> +
>  /*
>   * vm_fault is filled by the the pagefault handler and passed to the vma's
>   * ->fault function. The vma's ->fault is responsible for returning a bitmask
> @@ -1010,6 +1017,14 @@ static inline int fixup_user_fault(struct task_struct *tsk,
>  }
>  #endif
>  
> +extern pte_t *pte_alloc_pagesz(struct mm_struct *mm, unsigned long addr, 
> +														unsigned long sz);
> +extern pte_t *pte_offset_pagesz(struct mm_struct *mm, unsigned long addr, 
> +														unsigned long *sz);
> +extern void unmap_xip_hugetlb_range(struct vm_area_struct *vma,
> +									unsigned long start, unsigned long end);
> +
> +

Extra line of whitespace.

> diff --git a/mm/memory.c b/mm/memory.c
> index 13cbc42..5a8bd23 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1054,6 +1054,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  			return 0;
>  	}
>  
> +	/* FIXME: For now, don't copy ptes and let it fault. */
> +	if (is_xip_hugetlb_mapping(vma))
> +		return 0;
> +
>  	if (is_vm_hugetlb_page(vma))
>  		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
>  
> @@ -1353,6 +1357,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
>  				__unmap_hugepage_range_final(tlb, vma, start, end, NULL);
>  				mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
>  			}
> +		} else if (is_xip_hugetlb_mapping(vma)) {
> +				unmap_xip_hugetlb_range(vma, start, end);
> +				start = end;

Spacing for these two lines is off.

> +int make_pages_present(unsigned long addr, unsigned long end)
> +{
> +	int ret, len, write;
> +	struct vm_area_struct * vma;
> +
> +	vma = find_vma(current->mm, addr);
> +	if (!vma)
> +		return -ENOMEM;
> +	/*
> +	 * We want to touch writable mappings with a write fault in order
> +	 * to break COW, except for shared mappings because these don't COW
> +	 * and we would not want to dirty them for nothing.
> +	 */
> +	write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
> +	BUG_ON(addr >= end);
> +	BUG_ON(end > vma->vm_end);
> +	len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
> +	ret = get_user_pages(current, current->mm, addr,
> +			len, write, 0, NULL, NULL);
> +	if (ret < 0)
> +		return ret;
> +	return ret == len ? 0 : -EFAULT;
> +}
> +

We didn't add the function 'make_pages_present', and it isn't used in the new
3.9 code at all - I'm guessing it got copied by accident?

> diff --git a/mm/mlock.c b/mm/mlock.c
> index 79b7cf7..f92ef16 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -173,6 +173,11 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma,
>  	VM_BUG_ON(end   > vma->vm_end);
>  	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
>  
> +	if (is_xip_hugetlb_mapping(vma)) {
> +		vma->vm_flags &= ~VM_LOCKED;
> +		return nr_pages;;

Double semicolon.





More information about the Linux-pmfs mailing list