[PATCH v4 14/25] rmap: Add support for PUD sized mappings to rmap

Alistair Popple apopple at nvidia.com
Wed Dec 18 14:55:35 PST 2024


On Tue, Dec 17, 2024 at 11:27:13PM +0100, David Hildenbrand wrote:
> On 17.12.24 06:12, Alistair Popple wrote:
> > The rmap doesn't currently support adding a PUD mapping of a
> > folio. This patch adds support for entire PUD mappings of folios,
> > primarily to allow for more standard refcounting of device DAX
> > folios. Currently DAX is the only user of this and it doesn't require
> > support for partially mapped PUD-sized folios so we don't support for
> > that for now.
> > 
> > Signed-off-by: Alistair Popple <apopple at nvidia.com>
> > 
> > ---
> > 
> > David - Thanks for your previous comments, I'm less familiar with the
> > rmap code so I would appreciate you taking another look. In particular
> > I haven't added a stat for PUD mapped folios as it seemed like
> > overkill for just the device DAX case but let me know if you think
> > otherwise.
> > 
> > Changes for v4:
> > 
> >   - New for v4, split out rmap changes as suggested by David.
> > ---
> >   include/linux/rmap.h | 15 ++++++++++++-
> >   mm/rmap.c            | 56 +++++++++++++++++++++++++++++++++++++++++++++-
> >   2 files changed, 71 insertions(+)
> > 
> > diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> > index 683a040..7043914 100644
> > --- a/include/linux/rmap.h
> > +++ b/include/linux/rmap.h
> > @@ -192,6 +192,7 @@ typedef int __bitwise rmap_t;
> >   enum rmap_level {
> >   	RMAP_LEVEL_PTE = 0,
> >   	RMAP_LEVEL_PMD,
> > +	RMAP_LEVEL_PUD,
> >   };
> >   static inline void __folio_rmap_sanity_checks(const struct folio *folio,
> > @@ -228,6 +229,14 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio,
> >   		VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
> >   		VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
> >   		break;
> > +	case RMAP_LEVEL_PUD:
> > +		/*
> > +		 * Assume that we are creating * a single "entire" mapping of the
> > +		 * folio.
> > +		 */
> > +		VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio);
> > +		VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio);
> > +		break;
> >   	default:
> >   		VM_WARN_ON_ONCE(true);
> >   	}
> > @@ -251,12 +260,16 @@ void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
> >   	folio_add_file_rmap_ptes(folio, page, 1, vma)
> >   void folio_add_file_rmap_pmd(struct folio *, struct page *,
> >   		struct vm_area_struct *);
> > +void folio_add_file_rmap_pud(struct folio *, struct page *,
> > +		struct vm_area_struct *);
> >   void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages,
> >   		struct vm_area_struct *);
> >   #define folio_remove_rmap_pte(folio, page, vma) \
> >   	folio_remove_rmap_ptes(folio, page, 1, vma)
> >   void folio_remove_rmap_pmd(struct folio *, struct page *,
> >   		struct vm_area_struct *);
> > +void folio_remove_rmap_pud(struct folio *, struct page *,
> > +		struct vm_area_struct *);
> >   void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
> >   		unsigned long address, rmap_t flags);
> > @@ -341,6 +354,7 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
> >   		atomic_add(orig_nr_pages, &folio->_large_mapcount);
> >   		break;
> >   	case RMAP_LEVEL_PMD:
> > +	case RMAP_LEVEL_PUD:
> >   		atomic_inc(&folio->_entire_mapcount);
> >   		atomic_inc(&folio->_large_mapcount);
> >   		break;
> > @@ -437,6 +451,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
> >   		atomic_add(orig_nr_pages, &folio->_large_mapcount);
> >   		break;
> >   	case RMAP_LEVEL_PMD:
> > +	case RMAP_LEVEL_PUD:
> >   		if (PageAnonExclusive(page)) {
> >   			if (unlikely(maybe_pinned))
> >   				return -EBUSY;
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index c6c4d4e..39d0439 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -1203,6 +1203,11 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
> >   		}
> >   		atomic_inc(&folio->_large_mapcount);
> >   		break;
> > +	case RMAP_LEVEL_PUD:
> > +		/* We only support entire mappings of PUD sized folios in rmap */
> > +		atomic_inc(&folio->_entire_mapcount);
> > +		atomic_inc(&folio->_large_mapcount);
> > +		break;
> 
> 
> This way you don't account the pages at all as mapped, whereby PTE-mapping it
> would? And IIRC, these PUD-sized pages can be either mapped using PTEs or
> using a single PUD.

Oh good point. I was thinking that because we don't account PUD mappings today
that it would be fine to ignore them. But of course this series means we start
accounting them if mapped with PTEs so agree we should be consistent.
 
> I suspect what you want is to

Yes, I think so. Thanks for the hint. I will be out over the Christmas break but
will do a respin to incorporate this before then.

> diff --git a/mm/rmap.c b/mm/rmap.c
> index c6c4d4ea29a7e..1477028d3a176 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1187,12 +1187,19 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
>                 atomic_add(orig_nr_pages, &folio->_large_mapcount);
>                 break;
>         case RMAP_LEVEL_PMD:
> +       case RMAP_LEVEL_PUD:
>                 first = atomic_inc_and_test(&folio->_entire_mapcount);
>                 if (first) {
>                         nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped);
>                         if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) {
> -                               *nr_pmdmapped = folio_nr_pages(folio);
> -                               nr = *nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
> +                               nr_pages = folio_nr_pages(folio);
> +                               /*
> +                                * We only track PMD mappings of PMD-sized
> +                                * folios separately.
> +                                */
> +                               if (level == RMAP_LEVEL_PMD)
> +                                       *nr_pmdmapped = nr_pages;
> +                               nr = nr_pages - (nr & FOLIO_PAGES_MAPPED);
>                                 /* Raced ahead of a remove and another add? */
>                                 if (unlikely(nr < 0))
>                                         nr = 0;
> 
> Similar on the removal path.
> 
> 
> -- 
> Cheers,
> 
> David / dhildenb
> 



More information about the linux-arm-kernel mailing list